1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529 }
1530 
1531 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1532   return OMPBuilder.IdentPtr;
1533 }
1534 
1535 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1536   if (!Kmpc_MicroTy) {
1537     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1538     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1539                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1540     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1541   }
1542   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1543 }
1544 
1545 llvm::FunctionCallee
1546 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1547   assert((IVSize == 32 || IVSize == 64) &&
1548          "IV size is not compatible with the omp runtime");
1549   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1550                                             : "__kmpc_for_static_init_4u")
1551                                 : (IVSigned ? "__kmpc_for_static_init_8"
1552                                             : "__kmpc_for_static_init_8u");
1553   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1554   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1555   llvm::Type *TypeParams[] = {
1556     getIdentTyPointerTy(),                     // loc
1557     CGM.Int32Ty,                               // tid
1558     CGM.Int32Ty,                               // schedtype
1559     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1560     PtrTy,                                     // p_lower
1561     PtrTy,                                     // p_upper
1562     PtrTy,                                     // p_stride
1563     ITy,                                       // incr
1564     ITy                                        // chunk
1565   };
1566   auto *FnTy =
1567       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1568   return CGM.CreateRuntimeFunction(FnTy, Name);
1569 }
1570 
1571 llvm::FunctionCallee
1572 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1573   assert((IVSize == 32 || IVSize == 64) &&
1574          "IV size is not compatible with the omp runtime");
1575   StringRef Name =
1576       IVSize == 32
1577           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1578           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1581                                CGM.Int32Ty,           // tid
1582                                CGM.Int32Ty,           // schedtype
1583                                ITy,                   // lower
1584                                ITy,                   // upper
1585                                ITy,                   // stride
1586                                ITy                    // chunk
1587   };
1588   auto *FnTy =
1589       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1590   return CGM.CreateRuntimeFunction(FnTy, Name);
1591 }
1592 
1593 llvm::FunctionCallee
1594 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1595   assert((IVSize == 32 || IVSize == 64) &&
1596          "IV size is not compatible with the omp runtime");
1597   StringRef Name =
1598       IVSize == 32
1599           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1600           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1601   llvm::Type *TypeParams[] = {
1602       getIdentTyPointerTy(), // loc
1603       CGM.Int32Ty,           // tid
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1617           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1618   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1619   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1620   llvm::Type *TypeParams[] = {
1621     getIdentTyPointerTy(),                     // loc
1622     CGM.Int32Ty,                               // tid
1623     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1624     PtrTy,                                     // p_lower
1625     PtrTy,                                     // p_upper
1626     PtrTy                                      // p_stride
1627   };
1628   auto *FnTy =
1629       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1630   return CGM.CreateRuntimeFunction(FnTy, Name);
1631 }
1632 
1633 /// Obtain information that uniquely identifies a target entry. This
1634 /// consists of the file and device IDs as well as line number associated with
1635 /// the relevant entry source location.
1636 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1637                                      unsigned &DeviceID, unsigned &FileID,
1638                                      unsigned &LineNum) {
1639   SourceManager &SM = C.getSourceManager();
1640 
1641   // The loc should be always valid and have a file ID (the user cannot use
1642   // #pragma directives in macros)
1643 
1644   assert(Loc.isValid() && "Source location is expected to be always valid.");
1645 
1646   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1647   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1648 
1649   llvm::sys::fs::UniqueID ID;
1650   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1651     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1652         << PLoc.getFilename() << EC.message();
1653 
1654   DeviceID = ID.getDevice();
1655   FileID = ID.getFile();
1656   LineNum = PLoc.getLine();
1657 }
1658 
1659 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1660   if (CGM.getLangOpts().OpenMPSimd)
1661     return Address::invalid();
1662   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1663       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1664   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1665               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1666                HasRequiresUnifiedSharedMemory))) {
1667     SmallString<64> PtrName;
1668     {
1669       llvm::raw_svector_ostream OS(PtrName);
1670       OS << CGM.getMangledName(GlobalDecl(VD));
1671       if (!VD->isExternallyVisible()) {
1672         unsigned DeviceID, FileID, Line;
1673         getTargetEntryUniqueInfo(CGM.getContext(),
1674                                  VD->getCanonicalDecl()->getBeginLoc(),
1675                                  DeviceID, FileID, Line);
1676         OS << llvm::format("_%x", FileID);
1677       }
1678       OS << "_decl_tgt_ref_ptr";
1679     }
1680     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1681     if (!Ptr) {
1682       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1683       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1684                                         PtrName);
1685 
1686       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1687       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1688 
1689       if (!CGM.getLangOpts().OpenMPIsDevice)
1690         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1691       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1692     }
1693     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1694   }
1695   return Address::invalid();
1696 }
1697 
1698 llvm::Constant *
1699 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1700   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1701          !CGM.getContext().getTargetInfo().isTLSSupported());
1702   // Lookup the entry, lazily creating it if necessary.
1703   std::string Suffix = getName({"cache", ""});
1704   return getOrCreateInternalVariable(
1705       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1706 }
1707 
1708 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1709                                                 const VarDecl *VD,
1710                                                 Address VDAddr,
1711                                                 SourceLocation Loc) {
1712   if (CGM.getLangOpts().OpenMPUseTLS &&
1713       CGM.getContext().getTargetInfo().isTLSSupported())
1714     return VDAddr;
1715 
1716   llvm::Type *VarTy = VDAddr.getElementType();
1717   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1718                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1719                                                        CGM.Int8PtrTy),
1720                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1721                          getOrCreateThreadPrivateCache(VD)};
1722   return Address(CGF.EmitRuntimeCall(
1723                      OMPBuilder.getOrCreateRuntimeFunction(
1724                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1725                      Args),
1726                  VDAddr.getAlignment());
1727 }
1728 
1729 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1730     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1731     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1732   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1733   // library.
1734   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1735   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1736                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1737                       OMPLoc);
1738   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1739   // to register constructor/destructor for variable.
1740   llvm::Value *Args[] = {
1741       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1742       Ctor, CopyCtor, Dtor};
1743   CGF.EmitRuntimeCall(
1744       OMPBuilder.getOrCreateRuntimeFunction(
1745           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1746       Args);
1747 }
1748 
1749 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1750     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1751     bool PerformInit, CodeGenFunction *CGF) {
1752   if (CGM.getLangOpts().OpenMPUseTLS &&
1753       CGM.getContext().getTargetInfo().isTLSSupported())
1754     return nullptr;
1755 
1756   VD = VD->getDefinition(CGM.getContext());
1757   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1758     QualType ASTTy = VD->getType();
1759 
1760     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1761     const Expr *Init = VD->getAnyInitializer();
1762     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1763       // Generate function that re-emits the declaration's initializer into the
1764       // threadprivate copy of the variable VD
1765       CodeGenFunction CtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidPtrTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_ctor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1779                             Args, Loc, Loc);
1780       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1781           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1782           CGM.getContext().VoidPtrTy, Dst.getLocation());
1783       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1784       Arg = CtorCGF.Builder.CreateElementBitCast(
1785           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1786       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1787                                /*IsInitializer=*/true);
1788       ArgVal = CtorCGF.EmitLoadOfScalar(
1789           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1790           CGM.getContext().VoidPtrTy, Dst.getLocation());
1791       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1792       CtorCGF.FinishFunction();
1793       Ctor = Fn;
1794     }
1795     if (VD->getType().isDestructedType() != QualType::DK_none) {
1796       // Generate function that emits destructor call for the threadprivate copy
1797       // of the variable VD
1798       CodeGenFunction DtorCGF(CGM);
1799       FunctionArgList Args;
1800       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1801                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1802                             ImplicitParamDecl::Other);
1803       Args.push_back(&Dst);
1804 
1805       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1806           CGM.getContext().VoidTy, Args);
1807       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1808       std::string Name = getName({"__kmpc_global_dtor_", ""});
1809       llvm::Function *Fn =
1810           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1811       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1812       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1813                             Loc, Loc);
1814       // Create a scope with an artificial location for the body of this function.
1815       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1816       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1817           DtorCGF.GetAddrOfLocalVar(&Dst),
1818           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1819       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1820                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1821                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1822       DtorCGF.FinishFunction();
1823       Dtor = Fn;
1824     }
1825     // Do not emit init function if it is not required.
1826     if (!Ctor && !Dtor)
1827       return nullptr;
1828 
1829     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1830     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1831                                                /*isVarArg=*/false)
1832                            ->getPointerTo();
1833     // Copying constructor for the threadprivate variable.
1834     // Must be NULL - reserved by runtime, but currently it requires that this
1835     // parameter is always NULL. Otherwise it fires assertion.
1836     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1837     if (Ctor == nullptr) {
1838       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1839                                              /*isVarArg=*/false)
1840                          ->getPointerTo();
1841       Ctor = llvm::Constant::getNullValue(CtorTy);
1842     }
1843     if (Dtor == nullptr) {
1844       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1845                                              /*isVarArg=*/false)
1846                          ->getPointerTo();
1847       Dtor = llvm::Constant::getNullValue(DtorTy);
1848     }
1849     if (!CGF) {
1850       auto *InitFunctionTy =
1851           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1852       std::string Name = getName({"__omp_threadprivate_init_", ""});
1853       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1854           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1855       CodeGenFunction InitCGF(CGM);
1856       FunctionArgList ArgList;
1857       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1858                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1859                             Loc, Loc);
1860       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1861       InitCGF.FinishFunction();
1862       return InitFunction;
1863     }
1864     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1865   }
1866   return nullptr;
1867 }
1868 
1869 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1870                                                      llvm::GlobalVariable *Addr,
1871                                                      bool PerformInit) {
1872   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1873       !CGM.getLangOpts().OpenMPIsDevice)
1874     return false;
1875   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1876       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1877   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1878       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1879        HasRequiresUnifiedSharedMemory))
1880     return CGM.getLangOpts().OpenMPIsDevice;
1881   VD = VD->getDefinition(CGM.getContext());
1882   assert(VD && "Unknown VarDecl");
1883 
1884   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1885     return CGM.getLangOpts().OpenMPIsDevice;
1886 
1887   QualType ASTTy = VD->getType();
1888   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1889 
1890   // Produce the unique prefix to identify the new target regions. We use
1891   // the source location of the variable declaration which we know to not
1892   // conflict with any target region.
1893   unsigned DeviceID;
1894   unsigned FileID;
1895   unsigned Line;
1896   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1897   SmallString<128> Buffer, Out;
1898   {
1899     llvm::raw_svector_ostream OS(Buffer);
1900     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1901        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1902   }
1903 
1904   const Expr *Init = VD->getAnyInitializer();
1905   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1906     llvm::Constant *Ctor;
1907     llvm::Constant *ID;
1908     if (CGM.getLangOpts().OpenMPIsDevice) {
1909       // Generate function that re-emits the declaration's initializer into
1910       // the threadprivate copy of the variable VD
1911       CodeGenFunction CtorCGF(CGM);
1912 
1913       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1914       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1915       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1916           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1917       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1918       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1919                             FunctionArgList(), Loc, Loc);
1920       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1921       CtorCGF.EmitAnyExprToMem(Init,
1922                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1923                                Init->getType().getQualifiers(),
1924                                /*IsInitializer=*/true);
1925       CtorCGF.FinishFunction();
1926       Ctor = Fn;
1927       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1928       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1929     } else {
1930       Ctor = new llvm::GlobalVariable(
1931           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1932           llvm::GlobalValue::PrivateLinkage,
1933           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1934       ID = Ctor;
1935     }
1936 
1937     // Register the information for the entry associated with the constructor.
1938     Out.clear();
1939     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1940         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1941         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1942   }
1943   if (VD->getType().isDestructedType() != QualType::DK_none) {
1944     llvm::Constant *Dtor;
1945     llvm::Constant *ID;
1946     if (CGM.getLangOpts().OpenMPIsDevice) {
1947       // Generate function that emits destructor call for the threadprivate
1948       // copy of the variable VD
1949       CodeGenFunction DtorCGF(CGM);
1950 
1951       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1952       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1953       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1954           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1955       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1956       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1957                             FunctionArgList(), Loc, Loc);
1958       // Create a scope with an artificial location for the body of this
1959       // function.
1960       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1961       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1962                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1963                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1964       DtorCGF.FinishFunction();
1965       Dtor = Fn;
1966       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1967       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1968     } else {
1969       Dtor = new llvm::GlobalVariable(
1970           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1971           llvm::GlobalValue::PrivateLinkage,
1972           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1973       ID = Dtor;
1974     }
1975     // Register the information for the entry associated with the destructor.
1976     Out.clear();
1977     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1978         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1979         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1980   }
1981   return CGM.getLangOpts().OpenMPIsDevice;
1982 }
1983 
1984 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1985                                                           QualType VarType,
1986                                                           StringRef Name) {
1987   std::string Suffix = getName({"artificial", ""});
1988   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1989   llvm::Value *GAddr =
1990       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1991   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1992       CGM.getTarget().isTLSSupported()) {
1993     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1994     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1995   }
1996   std::string CacheSuffix = getName({"cache", ""});
1997   llvm::Value *Args[] = {
1998       emitUpdateLocation(CGF, SourceLocation()),
1999       getThreadID(CGF, SourceLocation()),
2000       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2001       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2002                                 /*isSigned=*/false),
2003       getOrCreateInternalVariable(
2004           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2005   return Address(
2006       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2007           CGF.EmitRuntimeCall(
2008               OMPBuilder.getOrCreateRuntimeFunction(
2009                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2010               Args),
2011           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2012       CGM.getContext().getTypeAlignInChars(VarType));
2013 }
2014 
2015 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2016                                    const RegionCodeGenTy &ThenGen,
2017                                    const RegionCodeGenTy &ElseGen) {
2018   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2019 
2020   // If the condition constant folds and can be elided, try to avoid emitting
2021   // the condition and the dead arm of the if/else.
2022   bool CondConstant;
2023   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2024     if (CondConstant)
2025       ThenGen(CGF);
2026     else
2027       ElseGen(CGF);
2028     return;
2029   }
2030 
2031   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2032   // emit the conditional branch.
2033   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2034   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2035   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2036   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2037 
2038   // Emit the 'then' code.
2039   CGF.EmitBlock(ThenBlock);
2040   ThenGen(CGF);
2041   CGF.EmitBranch(ContBlock);
2042   // Emit the 'else' code if present.
2043   // There is no need to emit line number for unconditional branch.
2044   (void)ApplyDebugLocation::CreateEmpty(CGF);
2045   CGF.EmitBlock(ElseBlock);
2046   ElseGen(CGF);
2047   // There is no need to emit line number for unconditional branch.
2048   (void)ApplyDebugLocation::CreateEmpty(CGF);
2049   CGF.EmitBranch(ContBlock);
2050   // Emit the continuation block for code after the if.
2051   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2052 }
2053 
2054 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2055                                        llvm::Function *OutlinedFn,
2056                                        ArrayRef<llvm::Value *> CapturedVars,
2057                                        const Expr *IfCond) {
2058   if (!CGF.HaveInsertPoint())
2059     return;
2060   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2061   auto &M = CGM.getModule();
2062   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2063                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2064     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2065     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2066     llvm::Value *Args[] = {
2067         RTLoc,
2068         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2069         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2070     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2071     RealArgs.append(std::begin(Args), std::end(Args));
2072     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2073 
2074     llvm::FunctionCallee RTLFn =
2075         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2076     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2077   };
2078   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2079                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2080     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2081     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2082     // Build calls:
2083     // __kmpc_serialized_parallel(&Loc, GTid);
2084     llvm::Value *Args[] = {RTLoc, ThreadID};
2085     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2086                             M, OMPRTL___kmpc_serialized_parallel),
2087                         Args);
2088 
2089     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2090     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2091     Address ZeroAddrBound =
2092         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2093                                          /*Name=*/".bound.zero.addr");
2094     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2095     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2096     // ThreadId for serialized parallels is 0.
2097     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2098     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2099     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2100     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2101 
2102     // __kmpc_end_serialized_parallel(&Loc, GTid);
2103     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2104     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2105                             M, OMPRTL___kmpc_end_serialized_parallel),
2106                         EndArgs);
2107   };
2108   if (IfCond) {
2109     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2110   } else {
2111     RegionCodeGenTy ThenRCG(ThenGen);
2112     ThenRCG(CGF);
2113   }
2114 }
2115 
2116 // If we're inside an (outlined) parallel region, use the region info's
2117 // thread-ID variable (it is passed in a first argument of the outlined function
2118 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2119 // regular serial code region, get thread ID by calling kmp_int32
2120 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2121 // return the address of that temp.
2122 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2123                                              SourceLocation Loc) {
2124   if (auto *OMPRegionInfo =
2125           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2126     if (OMPRegionInfo->getThreadIDVariable())
2127       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2128 
2129   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2130   QualType Int32Ty =
2131       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2132   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2133   CGF.EmitStoreOfScalar(ThreadID,
2134                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2135 
2136   return ThreadIDTemp;
2137 }
2138 
2139 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2140     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2141   SmallString<256> Buffer;
2142   llvm::raw_svector_ostream Out(Buffer);
2143   Out << Name;
2144   StringRef RuntimeName = Out.str();
2145   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2146   if (Elem.second) {
2147     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2148            "OMP internal variable has different type than requested");
2149     return &*Elem.second;
2150   }
2151 
2152   return Elem.second = new llvm::GlobalVariable(
2153              CGM.getModule(), Ty, /*IsConstant*/ false,
2154              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2155              Elem.first(), /*InsertBefore=*/nullptr,
2156              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2157 }
2158 
2159 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2160   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2161   std::string Name = getName({Prefix, "var"});
2162   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2163 }
2164 
2165 namespace {
2166 /// Common pre(post)-action for different OpenMP constructs.
2167 class CommonActionTy final : public PrePostActionTy {
2168   llvm::FunctionCallee EnterCallee;
2169   ArrayRef<llvm::Value *> EnterArgs;
2170   llvm::FunctionCallee ExitCallee;
2171   ArrayRef<llvm::Value *> ExitArgs;
2172   bool Conditional;
2173   llvm::BasicBlock *ContBlock = nullptr;
2174 
2175 public:
2176   CommonActionTy(llvm::FunctionCallee EnterCallee,
2177                  ArrayRef<llvm::Value *> EnterArgs,
2178                  llvm::FunctionCallee ExitCallee,
2179                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2180       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2181         ExitArgs(ExitArgs), Conditional(Conditional) {}
2182   void Enter(CodeGenFunction &CGF) override {
2183     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2184     if (Conditional) {
2185       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2186       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2187       ContBlock = CGF.createBasicBlock("omp_if.end");
2188       // Generate the branch (If-stmt)
2189       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2190       CGF.EmitBlock(ThenBlock);
2191     }
2192   }
2193   void Done(CodeGenFunction &CGF) {
2194     // Emit the rest of blocks/branches
2195     CGF.EmitBranch(ContBlock);
2196     CGF.EmitBlock(ContBlock, true);
2197   }
2198   void Exit(CodeGenFunction &CGF) override {
2199     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2200   }
2201 };
2202 } // anonymous namespace
2203 
2204 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2205                                          StringRef CriticalName,
2206                                          const RegionCodeGenTy &CriticalOpGen,
2207                                          SourceLocation Loc, const Expr *Hint) {
2208   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2209   // CriticalOpGen();
2210   // __kmpc_end_critical(ident_t *, gtid, Lock);
2211   // Prepare arguments and build a call to __kmpc_critical
2212   if (!CGF.HaveInsertPoint())
2213     return;
2214   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2215                          getCriticalRegionLock(CriticalName)};
2216   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2217                                                 std::end(Args));
2218   if (Hint) {
2219     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2220         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2221   }
2222   CommonActionTy Action(
2223       OMPBuilder.getOrCreateRuntimeFunction(
2224           CGM.getModule(),
2225           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2226       EnterArgs,
2227       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2228                                             OMPRTL___kmpc_end_critical),
2229       Args);
2230   CriticalOpGen.setAction(Action);
2231   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2232 }
2233 
2234 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2235                                        const RegionCodeGenTy &MasterOpGen,
2236                                        SourceLocation Loc) {
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   // if(__kmpc_master(ident_t *, gtid)) {
2240   //   MasterOpGen();
2241   //   __kmpc_end_master(ident_t *, gtid);
2242   // }
2243   // Prepare arguments and build a call to __kmpc_master
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2245   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2246                             CGM.getModule(), OMPRTL___kmpc_master),
2247                         Args,
2248                         OMPBuilder.getOrCreateRuntimeFunction(
2249                             CGM.getModule(), OMPRTL___kmpc_end_master),
2250                         Args,
2251                         /*Conditional=*/true);
2252   MasterOpGen.setAction(Action);
2253   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2254   Action.Done(CGF);
2255 }
2256 
2257 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2258                                         SourceLocation Loc) {
2259   if (!CGF.HaveInsertPoint())
2260     return;
2261   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2262     OMPBuilder.CreateTaskyield(CGF.Builder);
2263   } else {
2264     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2265     llvm::Value *Args[] = {
2266         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2267         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2268     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2269                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2270                         Args);
2271   }
2272 
2273   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2274     Region->emitUntiedSwitch(CGF);
2275 }
2276 
2277 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2278                                           const RegionCodeGenTy &TaskgroupOpGen,
2279                                           SourceLocation Loc) {
2280   if (!CGF.HaveInsertPoint())
2281     return;
2282   // __kmpc_taskgroup(ident_t *, gtid);
2283   // TaskgroupOpGen();
2284   // __kmpc_end_taskgroup(ident_t *, gtid);
2285   // Prepare arguments and build a call to __kmpc_taskgroup
2286   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2287   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2289                         Args,
2290                         OMPBuilder.getOrCreateRuntimeFunction(
2291                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2292                         Args);
2293   TaskgroupOpGen.setAction(Action);
2294   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2295 }
2296 
2297 /// Given an array of pointers to variables, project the address of a
2298 /// given variable.
2299 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2300                                       unsigned Index, const VarDecl *Var) {
2301   // Pull out the pointer to the variable.
2302   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2303   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2304 
2305   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2306   Addr = CGF.Builder.CreateElementBitCast(
2307       Addr, CGF.ConvertTypeForMem(Var->getType()));
2308   return Addr;
2309 }
2310 
2311 static llvm::Value *emitCopyprivateCopyFunction(
2312     CodeGenModule &CGM, llvm::Type *ArgsType,
2313     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2314     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2315     SourceLocation Loc) {
2316   ASTContext &C = CGM.getContext();
2317   // void copy_func(void *LHSArg, void *RHSArg);
2318   FunctionArgList Args;
2319   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2320                            ImplicitParamDecl::Other);
2321   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2322                            ImplicitParamDecl::Other);
2323   Args.push_back(&LHSArg);
2324   Args.push_back(&RHSArg);
2325   const auto &CGFI =
2326       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2327   std::string Name =
2328       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2329   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2330                                     llvm::GlobalValue::InternalLinkage, Name,
2331                                     &CGM.getModule());
2332   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2333   Fn->setDoesNotRecurse();
2334   CodeGenFunction CGF(CGM);
2335   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2336   // Dest = (void*[n])(LHSArg);
2337   // Src = (void*[n])(RHSArg);
2338   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2339       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2340       ArgsType), CGF.getPointerAlign());
2341   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2342       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2343       ArgsType), CGF.getPointerAlign());
2344   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2345   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2346   // ...
2347   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2348   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2349     const auto *DestVar =
2350         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2351     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2352 
2353     const auto *SrcVar =
2354         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2355     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2356 
2357     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2358     QualType Type = VD->getType();
2359     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2360   }
2361   CGF.FinishFunction();
2362   return Fn;
2363 }
2364 
2365 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2366                                        const RegionCodeGenTy &SingleOpGen,
2367                                        SourceLocation Loc,
2368                                        ArrayRef<const Expr *> CopyprivateVars,
2369                                        ArrayRef<const Expr *> SrcExprs,
2370                                        ArrayRef<const Expr *> DstExprs,
2371                                        ArrayRef<const Expr *> AssignmentOps) {
2372   if (!CGF.HaveInsertPoint())
2373     return;
2374   assert(CopyprivateVars.size() == SrcExprs.size() &&
2375          CopyprivateVars.size() == DstExprs.size() &&
2376          CopyprivateVars.size() == AssignmentOps.size());
2377   ASTContext &C = CGM.getContext();
2378   // int32 did_it = 0;
2379   // if(__kmpc_single(ident_t *, gtid)) {
2380   //   SingleOpGen();
2381   //   __kmpc_end_single(ident_t *, gtid);
2382   //   did_it = 1;
2383   // }
2384   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2385   // <copy_func>, did_it);
2386 
2387   Address DidIt = Address::invalid();
2388   if (!CopyprivateVars.empty()) {
2389     // int32 did_it = 0;
2390     QualType KmpInt32Ty =
2391         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2392     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2393     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2394   }
2395   // Prepare arguments and build a call to __kmpc_single
2396   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2397   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2398                             CGM.getModule(), OMPRTL___kmpc_single),
2399                         Args,
2400                         OMPBuilder.getOrCreateRuntimeFunction(
2401                             CGM.getModule(), OMPRTL___kmpc_end_single),
2402                         Args,
2403                         /*Conditional=*/true);
2404   SingleOpGen.setAction(Action);
2405   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2406   if (DidIt.isValid()) {
2407     // did_it = 1;
2408     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2409   }
2410   Action.Done(CGF);
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413   if (DidIt.isValid()) {
2414     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2415     QualType CopyprivateArrayTy = C.getConstantArrayType(
2416         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2417         /*IndexTypeQuals=*/0);
2418     // Create a list of all private variables for copyprivate.
2419     Address CopyprivateList =
2420         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2421     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2422       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2423       CGF.Builder.CreateStore(
2424           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2425               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2426               CGF.VoidPtrTy),
2427           Elem);
2428     }
2429     // Build function that copies private values from single region to all other
2430     // threads in the corresponding parallel region.
2431     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2432         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2433         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2434     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2435     Address CL =
2436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2437                                                       CGF.VoidPtrTy);
2438     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2439     llvm::Value *Args[] = {
2440         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2441         getThreadID(CGF, Loc),        // i32 <gtid>
2442         BufSize,                      // size_t <buf_size>
2443         CL.getPointer(),              // void *<copyprivate list>
2444         CpyFn,                        // void (*) (void *, void *) <copy_func>
2445         DidItVal                      // i32 did_it
2446     };
2447     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2448                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2449                         Args);
2450   }
2451 }
2452 
2453 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2454                                         const RegionCodeGenTy &OrderedOpGen,
2455                                         SourceLocation Loc, bool IsThreads) {
2456   if (!CGF.HaveInsertPoint())
2457     return;
2458   // __kmpc_ordered(ident_t *, gtid);
2459   // OrderedOpGen();
2460   // __kmpc_end_ordered(ident_t *, gtid);
2461   // Prepare arguments and build a call to __kmpc_ordered
2462   if (IsThreads) {
2463     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2464     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2465                               CGM.getModule(), OMPRTL___kmpc_ordered),
2466                           Args,
2467                           OMPBuilder.getOrCreateRuntimeFunction(
2468                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2469                           Args);
2470     OrderedOpGen.setAction(Action);
2471     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2472     return;
2473   }
2474   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2475 }
2476 
2477 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2478   unsigned Flags;
2479   if (Kind == OMPD_for)
2480     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2481   else if (Kind == OMPD_sections)
2482     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2483   else if (Kind == OMPD_single)
2484     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2485   else if (Kind == OMPD_barrier)
2486     Flags = OMP_IDENT_BARRIER_EXPL;
2487   else
2488     Flags = OMP_IDENT_BARRIER_IMPL;
2489   return Flags;
2490 }
2491 
2492 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2493     CodeGenFunction &CGF, const OMPLoopDirective &S,
2494     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2495   // Check if the loop directive is actually a doacross loop directive. In this
2496   // case choose static, 1 schedule.
2497   if (llvm::any_of(
2498           S.getClausesOfKind<OMPOrderedClause>(),
2499           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2500     ScheduleKind = OMPC_SCHEDULE_static;
2501     // Chunk size is 1 in this case.
2502     llvm::APInt ChunkSize(32, 1);
2503     ChunkExpr = IntegerLiteral::Create(
2504         CGF.getContext(), ChunkSize,
2505         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2506         SourceLocation());
2507   }
2508 }
2509 
2510 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2511                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2512                                       bool ForceSimpleCall) {
2513   // Check if we should use the OMPBuilder
2514   auto *OMPRegionInfo =
2515       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2516   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2517     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2518         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2519     return;
2520   }
2521 
2522   if (!CGF.HaveInsertPoint())
2523     return;
2524   // Build call __kmpc_cancel_barrier(loc, thread_id);
2525   // Build call __kmpc_barrier(loc, thread_id);
2526   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2527   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2528   // thread_id);
2529   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2530                          getThreadID(CGF, Loc)};
2531   if (OMPRegionInfo) {
2532     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2533       llvm::Value *Result = CGF.EmitRuntimeCall(
2534           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2535                                                 OMPRTL___kmpc_cancel_barrier),
2536           Args);
2537       if (EmitChecks) {
2538         // if (__kmpc_cancel_barrier()) {
2539         //   exit from construct;
2540         // }
2541         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2542         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2543         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2544         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2545         CGF.EmitBlock(ExitBB);
2546         //   exit from construct;
2547         CodeGenFunction::JumpDest CancelDestination =
2548             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2549         CGF.EmitBranchThroughCleanup(CancelDestination);
2550         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2551       }
2552       return;
2553     }
2554   }
2555   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2556                           CGM.getModule(), OMPRTL___kmpc_barrier),
2557                       Args);
2558 }
2559 
2560 /// Map the OpenMP loop schedule to the runtime enumeration.
2561 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2562                                           bool Chunked, bool Ordered) {
2563   switch (ScheduleKind) {
2564   case OMPC_SCHEDULE_static:
2565     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2566                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2567   case OMPC_SCHEDULE_dynamic:
2568     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2569   case OMPC_SCHEDULE_guided:
2570     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2571   case OMPC_SCHEDULE_runtime:
2572     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2573   case OMPC_SCHEDULE_auto:
2574     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2575   case OMPC_SCHEDULE_unknown:
2576     assert(!Chunked && "chunk was specified but schedule kind not known");
2577     return Ordered ? OMP_ord_static : OMP_sch_static;
2578   }
2579   llvm_unreachable("Unexpected runtime schedule");
2580 }
2581 
2582 /// Map the OpenMP distribute schedule to the runtime enumeration.
2583 static OpenMPSchedType
2584 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2585   // only static is allowed for dist_schedule
2586   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2587 }
2588 
2589 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2590                                          bool Chunked) const {
2591   OpenMPSchedType Schedule =
2592       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2593   return Schedule == OMP_sch_static;
2594 }
2595 
2596 bool CGOpenMPRuntime::isStaticNonchunked(
2597     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2598   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2599   return Schedule == OMP_dist_sch_static;
2600 }
2601 
2602 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2603                                       bool Chunked) const {
2604   OpenMPSchedType Schedule =
2605       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2606   return Schedule == OMP_sch_static_chunked;
2607 }
2608 
2609 bool CGOpenMPRuntime::isStaticChunked(
2610     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2611   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2612   return Schedule == OMP_dist_sch_static_chunked;
2613 }
2614 
2615 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2616   OpenMPSchedType Schedule =
2617       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2618   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2619   return Schedule != OMP_sch_static;
2620 }
2621 
2622 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2623                                   OpenMPScheduleClauseModifier M1,
2624                                   OpenMPScheduleClauseModifier M2) {
2625   int Modifier = 0;
2626   switch (M1) {
2627   case OMPC_SCHEDULE_MODIFIER_monotonic:
2628     Modifier = OMP_sch_modifier_monotonic;
2629     break;
2630   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2631     Modifier = OMP_sch_modifier_nonmonotonic;
2632     break;
2633   case OMPC_SCHEDULE_MODIFIER_simd:
2634     if (Schedule == OMP_sch_static_chunked)
2635       Schedule = OMP_sch_static_balanced_chunked;
2636     break;
2637   case OMPC_SCHEDULE_MODIFIER_last:
2638   case OMPC_SCHEDULE_MODIFIER_unknown:
2639     break;
2640   }
2641   switch (M2) {
2642   case OMPC_SCHEDULE_MODIFIER_monotonic:
2643     Modifier = OMP_sch_modifier_monotonic;
2644     break;
2645   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2646     Modifier = OMP_sch_modifier_nonmonotonic;
2647     break;
2648   case OMPC_SCHEDULE_MODIFIER_simd:
2649     if (Schedule == OMP_sch_static_chunked)
2650       Schedule = OMP_sch_static_balanced_chunked;
2651     break;
2652   case OMPC_SCHEDULE_MODIFIER_last:
2653   case OMPC_SCHEDULE_MODIFIER_unknown:
2654     break;
2655   }
2656   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2657   // If the static schedule kind is specified or if the ordered clause is
2658   // specified, and if the nonmonotonic modifier is not specified, the effect is
2659   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2660   // modifier is specified, the effect is as if the nonmonotonic modifier is
2661   // specified.
2662   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2663     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2664           Schedule == OMP_sch_static_balanced_chunked ||
2665           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2666           Schedule == OMP_dist_sch_static_chunked ||
2667           Schedule == OMP_dist_sch_static))
2668       Modifier = OMP_sch_modifier_nonmonotonic;
2669   }
2670   return Schedule | Modifier;
2671 }
2672 
2673 void CGOpenMPRuntime::emitForDispatchInit(
2674     CodeGenFunction &CGF, SourceLocation Loc,
2675     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2676     bool Ordered, const DispatchRTInput &DispatchValues) {
2677   if (!CGF.HaveInsertPoint())
2678     return;
2679   OpenMPSchedType Schedule = getRuntimeSchedule(
2680       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2681   assert(Ordered ||
2682          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2683           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2684           Schedule != OMP_sch_static_balanced_chunked));
2685   // Call __kmpc_dispatch_init(
2686   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2687   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2688   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2689 
2690   // If the Chunk was not specified in the clause - use default value 1.
2691   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2692                                             : CGF.Builder.getIntN(IVSize, 1);
2693   llvm::Value *Args[] = {
2694       emitUpdateLocation(CGF, Loc),
2695       getThreadID(CGF, Loc),
2696       CGF.Builder.getInt32(addMonoNonMonoModifier(
2697           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2698       DispatchValues.LB,                                     // Lower
2699       DispatchValues.UB,                                     // Upper
2700       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2701       Chunk                                                  // Chunk
2702   };
2703   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2704 }
2705 
2706 static void emitForStaticInitCall(
2707     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2708     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2709     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2710     const CGOpenMPRuntime::StaticRTInput &Values) {
2711   if (!CGF.HaveInsertPoint())
2712     return;
2713 
2714   assert(!Values.Ordered);
2715   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2716          Schedule == OMP_sch_static_balanced_chunked ||
2717          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2718          Schedule == OMP_dist_sch_static ||
2719          Schedule == OMP_dist_sch_static_chunked);
2720 
2721   // Call __kmpc_for_static_init(
2722   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2723   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2724   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2725   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2726   llvm::Value *Chunk = Values.Chunk;
2727   if (Chunk == nullptr) {
2728     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2729             Schedule == OMP_dist_sch_static) &&
2730            "expected static non-chunked schedule");
2731     // If the Chunk was not specified in the clause - use default value 1.
2732     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2733   } else {
2734     assert((Schedule == OMP_sch_static_chunked ||
2735             Schedule == OMP_sch_static_balanced_chunked ||
2736             Schedule == OMP_ord_static_chunked ||
2737             Schedule == OMP_dist_sch_static_chunked) &&
2738            "expected static chunked schedule");
2739   }
2740   llvm::Value *Args[] = {
2741       UpdateLocation,
2742       ThreadId,
2743       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2744                                                   M2)), // Schedule type
2745       Values.IL.getPointer(),                           // &isLastIter
2746       Values.LB.getPointer(),                           // &LB
2747       Values.UB.getPointer(),                           // &UB
2748       Values.ST.getPointer(),                           // &Stride
2749       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2750       Chunk                                             // Chunk
2751   };
2752   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2753 }
2754 
2755 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2756                                         SourceLocation Loc,
2757                                         OpenMPDirectiveKind DKind,
2758                                         const OpenMPScheduleTy &ScheduleKind,
2759                                         const StaticRTInput &Values) {
2760   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2761       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2762   assert(isOpenMPWorksharingDirective(DKind) &&
2763          "Expected loop-based or sections-based directive.");
2764   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2765                                              isOpenMPLoopDirective(DKind)
2766                                                  ? OMP_IDENT_WORK_LOOP
2767                                                  : OMP_IDENT_WORK_SECTIONS);
2768   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2769   llvm::FunctionCallee StaticInitFunction =
2770       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2771   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2772   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2773                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2774 }
2775 
2776 void CGOpenMPRuntime::emitDistributeStaticInit(
2777     CodeGenFunction &CGF, SourceLocation Loc,
2778     OpenMPDistScheduleClauseKind SchedKind,
2779     const CGOpenMPRuntime::StaticRTInput &Values) {
2780   OpenMPSchedType ScheduleNum =
2781       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2782   llvm::Value *UpdatedLocation =
2783       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2784   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2785   llvm::FunctionCallee StaticInitFunction =
2786       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2787   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2788                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2789                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2790 }
2791 
2792 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2793                                           SourceLocation Loc,
2794                                           OpenMPDirectiveKind DKind) {
2795   if (!CGF.HaveInsertPoint())
2796     return;
2797   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2798   llvm::Value *Args[] = {
2799       emitUpdateLocation(CGF, Loc,
2800                          isOpenMPDistributeDirective(DKind)
2801                              ? OMP_IDENT_WORK_DISTRIBUTE
2802                              : isOpenMPLoopDirective(DKind)
2803                                    ? OMP_IDENT_WORK_LOOP
2804                                    : OMP_IDENT_WORK_SECTIONS),
2805       getThreadID(CGF, Loc)};
2806   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2807   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2808                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2809                       Args);
2810 }
2811 
2812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2813                                                  SourceLocation Loc,
2814                                                  unsigned IVSize,
2815                                                  bool IVSigned) {
2816   if (!CGF.HaveInsertPoint())
2817     return;
2818   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2819   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2820   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2821 }
2822 
2823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2824                                           SourceLocation Loc, unsigned IVSize,
2825                                           bool IVSigned, Address IL,
2826                                           Address LB, Address UB,
2827                                           Address ST) {
2828   // Call __kmpc_dispatch_next(
2829   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2830   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2831   //          kmp_int[32|64] *p_stride);
2832   llvm::Value *Args[] = {
2833       emitUpdateLocation(CGF, Loc),
2834       getThreadID(CGF, Loc),
2835       IL.getPointer(), // &isLastIter
2836       LB.getPointer(), // &Lower
2837       UB.getPointer(), // &Upper
2838       ST.getPointer()  // &Stride
2839   };
2840   llvm::Value *Call =
2841       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2842   return CGF.EmitScalarConversion(
2843       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2844       CGF.getContext().BoolTy, Loc);
2845 }
2846 
2847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2848                                            llvm::Value *NumThreads,
2849                                            SourceLocation Loc) {
2850   if (!CGF.HaveInsertPoint())
2851     return;
2852   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2853   llvm::Value *Args[] = {
2854       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2855       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2856   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2857                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2858                       Args);
2859 }
2860 
2861 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2862                                          ProcBindKind ProcBind,
2863                                          SourceLocation Loc) {
2864   if (!CGF.HaveInsertPoint())
2865     return;
2866   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2867   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2870       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2871   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2872                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2873                       Args);
2874 }
2875 
2876 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2877                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2878   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2879     OMPBuilder.CreateFlush(CGF.Builder);
2880   } else {
2881     if (!CGF.HaveInsertPoint())
2882       return;
2883     // Build call void __kmpc_flush(ident_t *loc)
2884     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                             CGM.getModule(), OMPRTL___kmpc_flush),
2886                         emitUpdateLocation(CGF, Loc));
2887   }
2888 }
2889 
2890 namespace {
2891 /// Indexes of fields for type kmp_task_t.
2892 enum KmpTaskTFields {
2893   /// List of shared variables.
2894   KmpTaskTShareds,
2895   /// Task routine.
2896   KmpTaskTRoutine,
2897   /// Partition id for the untied tasks.
2898   KmpTaskTPartId,
2899   /// Function with call of destructors for private variables.
2900   Data1,
2901   /// Task priority.
2902   Data2,
2903   /// (Taskloops only) Lower bound.
2904   KmpTaskTLowerBound,
2905   /// (Taskloops only) Upper bound.
2906   KmpTaskTUpperBound,
2907   /// (Taskloops only) Stride.
2908   KmpTaskTStride,
2909   /// (Taskloops only) Is last iteration flag.
2910   KmpTaskTLastIter,
2911   /// (Taskloops only) Reduction data.
2912   KmpTaskTReductions,
2913 };
2914 } // anonymous namespace
2915 
2916 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2917   return OffloadEntriesTargetRegion.empty() &&
2918          OffloadEntriesDeviceGlobalVar.empty();
2919 }
2920 
2921 /// Initialize target region entry.
2922 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2923     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2924                                     StringRef ParentName, unsigned LineNum,
2925                                     unsigned Order) {
2926   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2927                                              "only required for the device "
2928                                              "code generation.");
2929   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2930       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2931                                    OMPTargetRegionEntryTargetRegion);
2932   ++OffloadingEntriesNum;
2933 }
2934 
2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2936     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2937                                   StringRef ParentName, unsigned LineNum,
2938                                   llvm::Constant *Addr, llvm::Constant *ID,
2939                                   OMPTargetRegionEntryKind Flags) {
2940   // If we are emitting code for a target, the entry is already initialized,
2941   // only has to be registered.
2942   if (CGM.getLangOpts().OpenMPIsDevice) {
2943     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2944       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2945           DiagnosticsEngine::Error,
2946           "Unable to find target region on line '%0' in the device code.");
2947       CGM.getDiags().Report(DiagID) << LineNum;
2948       return;
2949     }
2950     auto &Entry =
2951         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2952     assert(Entry.isValid() && "Entry not initialized!");
2953     Entry.setAddress(Addr);
2954     Entry.setID(ID);
2955     Entry.setFlags(Flags);
2956   } else {
2957     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2958     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2959     ++OffloadingEntriesNum;
2960   }
2961 }
2962 
2963 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2964     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2965     unsigned LineNum) const {
2966   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2967   if (PerDevice == OffloadEntriesTargetRegion.end())
2968     return false;
2969   auto PerFile = PerDevice->second.find(FileID);
2970   if (PerFile == PerDevice->second.end())
2971     return false;
2972   auto PerParentName = PerFile->second.find(ParentName);
2973   if (PerParentName == PerFile->second.end())
2974     return false;
2975   auto PerLine = PerParentName->second.find(LineNum);
2976   if (PerLine == PerParentName->second.end())
2977     return false;
2978   // Fail if this entry is already registered.
2979   if (PerLine->second.getAddress() || PerLine->second.getID())
2980     return false;
2981   return true;
2982 }
2983 
2984 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2985     const OffloadTargetRegionEntryInfoActTy &Action) {
2986   // Scan all target region entries and perform the provided action.
2987   for (const auto &D : OffloadEntriesTargetRegion)
2988     for (const auto &F : D.second)
2989       for (const auto &P : F.second)
2990         for (const auto &L : P.second)
2991           Action(D.first, F.first, P.first(), L.first, L.second);
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2995     initializeDeviceGlobalVarEntryInfo(StringRef Name,
2996                                        OMPTargetGlobalVarEntryKind Flags,
2997                                        unsigned Order) {
2998   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2999                                              "only required for the device "
3000                                              "code generation.");
3001   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3002   ++OffloadingEntriesNum;
3003 }
3004 
3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3006     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3007                                      CharUnits VarSize,
3008                                      OMPTargetGlobalVarEntryKind Flags,
3009                                      llvm::GlobalValue::LinkageTypes Linkage) {
3010   if (CGM.getLangOpts().OpenMPIsDevice) {
3011     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3012     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3013            "Entry not initialized!");
3014     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3015            "Resetting with the new address.");
3016     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3017       if (Entry.getVarSize().isZero()) {
3018         Entry.setVarSize(VarSize);
3019         Entry.setLinkage(Linkage);
3020       }
3021       return;
3022     }
3023     Entry.setVarSize(VarSize);
3024     Entry.setLinkage(Linkage);
3025     Entry.setAddress(Addr);
3026   } else {
3027     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3028       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3029       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3030              "Entry not initialized!");
3031       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3032              "Resetting with the new address.");
3033       if (Entry.getVarSize().isZero()) {
3034         Entry.setVarSize(VarSize);
3035         Entry.setLinkage(Linkage);
3036       }
3037       return;
3038     }
3039     OffloadEntriesDeviceGlobalVar.try_emplace(
3040         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3046     actOnDeviceGlobalVarEntriesInfo(
3047         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3050     Action(E.getKey(), E.getValue());
3051 }
3052 
3053 void CGOpenMPRuntime::createOffloadEntry(
3054     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3055     llvm::GlobalValue::LinkageTypes Linkage) {
3056   StringRef Name = Addr->getName();
3057   llvm::Module &M = CGM.getModule();
3058   llvm::LLVMContext &C = M.getContext();
3059 
3060   // Create constant string with the name.
3061   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3062 
3063   std::string StringName = getName({"omp_offloading", "entry_name"});
3064   auto *Str = new llvm::GlobalVariable(
3065       M, StrPtrInit->getType(), /*isConstant=*/true,
3066       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3067   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3068 
3069   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3070                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3071                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3072                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3073                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3074   std::string EntryName = getName({"omp_offloading", "entry", ""});
3075   llvm::GlobalVariable *Entry = createGlobalStruct(
3076       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3077       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3078 
3079   // The entry has to be created in the section the linker expects it to be.
3080   Entry->setSection("omp_offloading_entries");
3081 }
3082 
3083 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3084   // Emit the offloading entries and metadata so that the device codegen side
3085   // can easily figure out what to emit. The produced metadata looks like
3086   // this:
3087   //
3088   // !omp_offload.info = !{!1, ...}
3089   //
3090   // Right now we only generate metadata for function that contain target
3091   // regions.
3092 
3093   // If we are in simd mode or there are no entries, we don't need to do
3094   // anything.
3095   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3096     return;
3097 
3098   llvm::Module &M = CGM.getModule();
3099   llvm::LLVMContext &C = M.getContext();
3100   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3101                          SourceLocation, StringRef>,
3102               16>
3103       OrderedEntries(OffloadEntriesInfoManager.size());
3104   llvm::SmallVector<StringRef, 16> ParentFunctions(
3105       OffloadEntriesInfoManager.size());
3106 
3107   // Auxiliary methods to create metadata values and strings.
3108   auto &&GetMDInt = [this](unsigned V) {
3109     return llvm::ConstantAsMetadata::get(
3110         llvm::ConstantInt::get(CGM.Int32Ty, V));
3111   };
3112 
3113   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3114 
3115   // Create the offloading info metadata node.
3116   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3117 
3118   // Create function that emits metadata for each target region entry;
3119   auto &&TargetRegionMetadataEmitter =
3120       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3121        &GetMDString](
3122           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3123           unsigned Line,
3124           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3125         // Generate metadata for target regions. Each entry of this metadata
3126         // contains:
3127         // - Entry 0 -> Kind of this type of metadata (0).
3128         // - Entry 1 -> Device ID of the file where the entry was identified.
3129         // - Entry 2 -> File ID of the file where the entry was identified.
3130         // - Entry 3 -> Mangled name of the function where the entry was
3131         // identified.
3132         // - Entry 4 -> Line in the file where the entry was identified.
3133         // - Entry 5 -> Order the entry was created.
3134         // The first element of the metadata node is the kind.
3135         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3136                                  GetMDInt(FileID),      GetMDString(ParentName),
3137                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3138 
3139         SourceLocation Loc;
3140         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3141                   E = CGM.getContext().getSourceManager().fileinfo_end();
3142              I != E; ++I) {
3143           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3144               I->getFirst()->getUniqueID().getFile() == FileID) {
3145             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3146                 I->getFirst(), Line, 1);
3147             break;
3148           }
3149         }
3150         // Save this entry in the right position of the ordered entries array.
3151         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3152         ParentFunctions[E.getOrder()] = ParentName;
3153 
3154         // Add metadata to the named metadata node.
3155         MD->addOperand(llvm::MDNode::get(C, Ops));
3156       };
3157 
3158   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3159       TargetRegionMetadataEmitter);
3160 
3161   // Create function that emits metadata for each device global variable entry;
3162   auto &&DeviceGlobalVarMetadataEmitter =
3163       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3164        MD](StringRef MangledName,
3165            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3166                &E) {
3167         // Generate metadata for global variables. Each entry of this metadata
3168         // contains:
3169         // - Entry 0 -> Kind of this type of metadata (1).
3170         // - Entry 1 -> Mangled name of the variable.
3171         // - Entry 2 -> Declare target kind.
3172         // - Entry 3 -> Order the entry was created.
3173         // The first element of the metadata node is the kind.
3174         llvm::Metadata *Ops[] = {
3175             GetMDInt(E.getKind()), GetMDString(MangledName),
3176             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3177 
3178         // Save this entry in the right position of the ordered entries array.
3179         OrderedEntries[E.getOrder()] =
3180             std::make_tuple(&E, SourceLocation(), MangledName);
3181 
3182         // Add metadata to the named metadata node.
3183         MD->addOperand(llvm::MDNode::get(C, Ops));
3184       };
3185 
3186   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3187       DeviceGlobalVarMetadataEmitter);
3188 
3189   for (const auto &E : OrderedEntries) {
3190     assert(std::get<0>(E) && "All ordered entries must exist!");
3191     if (const auto *CE =
3192             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3193                 std::get<0>(E))) {
3194       if (!CE->getID() || !CE->getAddress()) {
3195         // Do not blame the entry if the parent funtion is not emitted.
3196         StringRef FnName = ParentFunctions[CE->getOrder()];
3197         if (!CGM.GetGlobalValue(FnName))
3198           continue;
3199         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3200             DiagnosticsEngine::Error,
3201             "Offloading entry for target region in %0 is incorrect: either the "
3202             "address or the ID is invalid.");
3203         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3204         continue;
3205       }
3206       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3207                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3208     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3209                                              OffloadEntryInfoDeviceGlobalVar>(
3210                    std::get<0>(E))) {
3211       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3212           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3213               CE->getFlags());
3214       switch (Flags) {
3215       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3216         if (CGM.getLangOpts().OpenMPIsDevice &&
3217             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3218           continue;
3219         if (!CE->getAddress()) {
3220           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3221               DiagnosticsEngine::Error, "Offloading entry for declare target "
3222                                         "variable %0 is incorrect: the "
3223                                         "address is invalid.");
3224           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3225           continue;
3226         }
3227         // The vaiable has no definition - no need to add the entry.
3228         if (CE->getVarSize().isZero())
3229           continue;
3230         break;
3231       }
3232       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3233         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3234                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3235                "Declaret target link address is set.");
3236         if (CGM.getLangOpts().OpenMPIsDevice)
3237           continue;
3238         if (!CE->getAddress()) {
3239           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3240               DiagnosticsEngine::Error,
3241               "Offloading entry for declare target variable is incorrect: the "
3242               "address is invalid.");
3243           CGM.getDiags().Report(DiagID);
3244           continue;
3245         }
3246         break;
3247       }
3248       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3249                          CE->getVarSize().getQuantity(), Flags,
3250                          CE->getLinkage());
3251     } else {
3252       llvm_unreachable("Unsupported entry kind.");
3253     }
3254   }
3255 }
3256 
3257 /// Loads all the offload entries information from the host IR
3258 /// metadata.
3259 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3260   // If we are in target mode, load the metadata from the host IR. This code has
3261   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3262 
3263   if (!CGM.getLangOpts().OpenMPIsDevice)
3264     return;
3265 
3266   if (CGM.getLangOpts().OMPHostIRFile.empty())
3267     return;
3268 
3269   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3270   if (auto EC = Buf.getError()) {
3271     CGM.getDiags().Report(diag::err_cannot_open_file)
3272         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3273     return;
3274   }
3275 
3276   llvm::LLVMContext C;
3277   auto ME = expectedToErrorOrAndEmitErrors(
3278       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3279 
3280   if (auto EC = ME.getError()) {
3281     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3283     CGM.getDiags().Report(DiagID)
3284         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3285     return;
3286   }
3287 
3288   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3289   if (!MD)
3290     return;
3291 
3292   for (llvm::MDNode *MN : MD->operands()) {
3293     auto &&GetMDInt = [MN](unsigned Idx) {
3294       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3295       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3296     };
3297 
3298     auto &&GetMDString = [MN](unsigned Idx) {
3299       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3300       return V->getString();
3301     };
3302 
3303     switch (GetMDInt(0)) {
3304     default:
3305       llvm_unreachable("Unexpected metadata!");
3306       break;
3307     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3308         OffloadingEntryInfoTargetRegion:
3309       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3310           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3311           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3312           /*Order=*/GetMDInt(5));
3313       break;
3314     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3315         OffloadingEntryInfoDeviceGlobalVar:
3316       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3317           /*MangledName=*/GetMDString(1),
3318           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3319               /*Flags=*/GetMDInt(2)),
3320           /*Order=*/GetMDInt(3));
3321       break;
3322     }
3323   }
3324 }
3325 
3326 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3327   if (!KmpRoutineEntryPtrTy) {
3328     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3329     ASTContext &C = CGM.getContext();
3330     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3331     FunctionProtoType::ExtProtoInfo EPI;
3332     KmpRoutineEntryPtrQTy = C.getPointerType(
3333         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3334     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3335   }
3336 }
3337 
3338 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3339   // Make sure the type of the entry is already created. This is the type we
3340   // have to create:
3341   // struct __tgt_offload_entry{
3342   //   void      *addr;       // Pointer to the offload entry info.
3343   //                          // (function or global)
3344   //   char      *name;       // Name of the function or global.
3345   //   size_t     size;       // Size of the entry info (0 if it a function).
3346   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3347   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3348   // };
3349   if (TgtOffloadEntryQTy.isNull()) {
3350     ASTContext &C = CGM.getContext();
3351     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3352     RD->startDefinition();
3353     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3354     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3355     addFieldToRecordDecl(C, RD, C.getSizeType());
3356     addFieldToRecordDecl(
3357         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3358     addFieldToRecordDecl(
3359         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3360     RD->completeDefinition();
3361     RD->addAttr(PackedAttr::CreateImplicit(C));
3362     TgtOffloadEntryQTy = C.getRecordType(RD);
3363   }
3364   return TgtOffloadEntryQTy;
3365 }
3366 
3367 namespace {
3368 struct PrivateHelpersTy {
3369   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3370                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3371       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3372         PrivateElemInit(PrivateElemInit) {}
3373   const Expr *OriginalRef = nullptr;
3374   const VarDecl *Original = nullptr;
3375   const VarDecl *PrivateCopy = nullptr;
3376   const VarDecl *PrivateElemInit = nullptr;
3377 };
3378 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3379 } // anonymous namespace
3380 
3381 static RecordDecl *
3382 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3383   if (!Privates.empty()) {
3384     ASTContext &C = CGM.getContext();
3385     // Build struct .kmp_privates_t. {
3386     //         /*  private vars  */
3387     //       };
3388     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3389     RD->startDefinition();
3390     for (const auto &Pair : Privates) {
3391       const VarDecl *VD = Pair.second.Original;
3392       QualType Type = VD->getType().getNonReferenceType();
3393       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3394       if (VD->hasAttrs()) {
3395         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3396              E(VD->getAttrs().end());
3397              I != E; ++I)
3398           FD->addAttr(*I);
3399       }
3400     }
3401     RD->completeDefinition();
3402     return RD;
3403   }
3404   return nullptr;
3405 }
3406 
3407 static RecordDecl *
3408 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3409                          QualType KmpInt32Ty,
3410                          QualType KmpRoutineEntryPointerQTy) {
3411   ASTContext &C = CGM.getContext();
3412   // Build struct kmp_task_t {
3413   //         void *              shareds;
3414   //         kmp_routine_entry_t routine;
3415   //         kmp_int32           part_id;
3416   //         kmp_cmplrdata_t data1;
3417   //         kmp_cmplrdata_t data2;
3418   // For taskloops additional fields:
3419   //         kmp_uint64          lb;
3420   //         kmp_uint64          ub;
3421   //         kmp_int64           st;
3422   //         kmp_int32           liter;
3423   //         void *              reductions;
3424   //       };
3425   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3426   UD->startDefinition();
3427   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3428   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3429   UD->completeDefinition();
3430   QualType KmpCmplrdataTy = C.getRecordType(UD);
3431   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3432   RD->startDefinition();
3433   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3434   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3435   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3436   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3437   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3438   if (isOpenMPTaskLoopDirective(Kind)) {
3439     QualType KmpUInt64Ty =
3440         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3441     QualType KmpInt64Ty =
3442         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3443     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3444     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3445     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3446     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3447     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448   }
3449   RD->completeDefinition();
3450   return RD;
3451 }
3452 
3453 static RecordDecl *
3454 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3455                                      ArrayRef<PrivateDataTy> Privates) {
3456   ASTContext &C = CGM.getContext();
3457   // Build struct kmp_task_t_with_privates {
3458   //         kmp_task_t task_data;
3459   //         .kmp_privates_t. privates;
3460   //       };
3461   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3462   RD->startDefinition();
3463   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3464   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3465     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3466   RD->completeDefinition();
3467   return RD;
3468 }
3469 
3470 /// Emit a proxy function which accepts kmp_task_t as the second
3471 /// argument.
3472 /// \code
3473 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3474 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3475 ///   For taskloops:
3476 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3477 ///   tt->reductions, tt->shareds);
3478 ///   return 0;
3479 /// }
3480 /// \endcode
3481 static llvm::Function *
3482 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3483                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3484                       QualType KmpTaskTWithPrivatesPtrQTy,
3485                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3486                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3487                       llvm::Value *TaskPrivatesMap) {
3488   ASTContext &C = CGM.getContext();
3489   FunctionArgList Args;
3490   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3491                             ImplicitParamDecl::Other);
3492   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3493                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3494                                 ImplicitParamDecl::Other);
3495   Args.push_back(&GtidArg);
3496   Args.push_back(&TaskTypeArg);
3497   const auto &TaskEntryFnInfo =
3498       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3499   llvm::FunctionType *TaskEntryTy =
3500       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3501   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3502   auto *TaskEntry = llvm::Function::Create(
3503       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3504   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3505   TaskEntry->setDoesNotRecurse();
3506   CodeGenFunction CGF(CGM);
3507   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3508                     Loc, Loc);
3509 
3510   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3511   // tt,
3512   // For taskloops:
3513   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3514   // tt->task_data.shareds);
3515   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3516       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3517   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3518       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3519       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3520   const auto *KmpTaskTWithPrivatesQTyRD =
3521       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3522   LValue Base =
3523       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3524   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3525   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3526   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3527   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3528 
3529   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3530   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3531   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3532       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3533       CGF.ConvertTypeForMem(SharedsPtrTy));
3534 
3535   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3536   llvm::Value *PrivatesParam;
3537   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3538     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3539     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3540         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3541   } else {
3542     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3543   }
3544 
3545   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3546                                TaskPrivatesMap,
3547                                CGF.Builder
3548                                    .CreatePointerBitCastOrAddrSpaceCast(
3549                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3550                                    .getPointer()};
3551   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3552                                           std::end(CommonArgs));
3553   if (isOpenMPTaskLoopDirective(Kind)) {
3554     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3555     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3556     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3557     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3558     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3559     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3560     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3561     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3562     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3563     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3564     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3565     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3566     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3567     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3568     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3569     CallArgs.push_back(LBParam);
3570     CallArgs.push_back(UBParam);
3571     CallArgs.push_back(StParam);
3572     CallArgs.push_back(LIParam);
3573     CallArgs.push_back(RParam);
3574   }
3575   CallArgs.push_back(SharedsParam);
3576 
3577   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3578                                                   CallArgs);
3579   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3580                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3581   CGF.FinishFunction();
3582   return TaskEntry;
3583 }
3584 
3585 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3586                                             SourceLocation Loc,
3587                                             QualType KmpInt32Ty,
3588                                             QualType KmpTaskTWithPrivatesPtrQTy,
3589                                             QualType KmpTaskTWithPrivatesQTy) {
3590   ASTContext &C = CGM.getContext();
3591   FunctionArgList Args;
3592   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3593                             ImplicitParamDecl::Other);
3594   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3595                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3596                                 ImplicitParamDecl::Other);
3597   Args.push_back(&GtidArg);
3598   Args.push_back(&TaskTypeArg);
3599   const auto &DestructorFnInfo =
3600       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3601   llvm::FunctionType *DestructorFnTy =
3602       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3603   std::string Name =
3604       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3605   auto *DestructorFn =
3606       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3607                              Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3609                                     DestructorFnInfo);
3610   DestructorFn->setDoesNotRecurse();
3611   CodeGenFunction CGF(CGM);
3612   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3613                     Args, Loc, Loc);
3614 
3615   LValue Base = CGF.EmitLoadOfPointerLValue(
3616       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3617       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3618   const auto *KmpTaskTWithPrivatesQTyRD =
3619       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3620   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3621   Base = CGF.EmitLValueForField(Base, *FI);
3622   for (const auto *Field :
3623        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3624     if (QualType::DestructionKind DtorKind =
3625             Field->getType().isDestructedType()) {
3626       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3627       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3628     }
3629   }
3630   CGF.FinishFunction();
3631   return DestructorFn;
3632 }
3633 
3634 /// Emit a privates mapping function for correct handling of private and
3635 /// firstprivate variables.
3636 /// \code
3637 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3638 /// **noalias priv1,...,  <tyn> **noalias privn) {
3639 ///   *priv1 = &.privates.priv1;
3640 ///   ...;
3641 ///   *privn = &.privates.privn;
3642 /// }
3643 /// \endcode
3644 static llvm::Value *
3645 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3646                                ArrayRef<const Expr *> PrivateVars,
3647                                ArrayRef<const Expr *> FirstprivateVars,
3648                                ArrayRef<const Expr *> LastprivateVars,
3649                                QualType PrivatesQTy,
3650                                ArrayRef<PrivateDataTy> Privates) {
3651   ASTContext &C = CGM.getContext();
3652   FunctionArgList Args;
3653   ImplicitParamDecl TaskPrivatesArg(
3654       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3655       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3656       ImplicitParamDecl::Other);
3657   Args.push_back(&TaskPrivatesArg);
3658   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3659   unsigned Counter = 1;
3660   for (const Expr *E : PrivateVars) {
3661     Args.push_back(ImplicitParamDecl::Create(
3662         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3663         C.getPointerType(C.getPointerType(E->getType()))
3664             .withConst()
3665             .withRestrict(),
3666         ImplicitParamDecl::Other));
3667     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3668     PrivateVarsPos[VD] = Counter;
3669     ++Counter;
3670   }
3671   for (const Expr *E : FirstprivateVars) {
3672     Args.push_back(ImplicitParamDecl::Create(
3673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674         C.getPointerType(C.getPointerType(E->getType()))
3675             .withConst()
3676             .withRestrict(),
3677         ImplicitParamDecl::Other));
3678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679     PrivateVarsPos[VD] = Counter;
3680     ++Counter;
3681   }
3682   for (const Expr *E : LastprivateVars) {
3683     Args.push_back(ImplicitParamDecl::Create(
3684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685         C.getPointerType(C.getPointerType(E->getType()))
3686             .withConst()
3687             .withRestrict(),
3688         ImplicitParamDecl::Other));
3689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690     PrivateVarsPos[VD] = Counter;
3691     ++Counter;
3692   }
3693   const auto &TaskPrivatesMapFnInfo =
3694       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3695   llvm::FunctionType *TaskPrivatesMapTy =
3696       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3697   std::string Name =
3698       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3699   auto *TaskPrivatesMap = llvm::Function::Create(
3700       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3701       &CGM.getModule());
3702   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3703                                     TaskPrivatesMapFnInfo);
3704   if (CGM.getLangOpts().Optimize) {
3705     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3706     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3707     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3708   }
3709   CodeGenFunction CGF(CGM);
3710   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3711                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3712 
3713   // *privi = &.privates.privi;
3714   LValue Base = CGF.EmitLoadOfPointerLValue(
3715       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3716       TaskPrivatesArg.getType()->castAs<PointerType>());
3717   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3718   Counter = 0;
3719   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3720     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3721     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3722     LValue RefLVal =
3723         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3724     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3725         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3726     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3727     ++Counter;
3728   }
3729   CGF.FinishFunction();
3730   return TaskPrivatesMap;
3731 }
3732 
3733 /// Emit initialization for private variables in task-based directives.
3734 static void emitPrivatesInit(CodeGenFunction &CGF,
3735                              const OMPExecutableDirective &D,
3736                              Address KmpTaskSharedsPtr, LValue TDBase,
3737                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3738                              QualType SharedsTy, QualType SharedsPtrTy,
3739                              const OMPTaskDataTy &Data,
3740                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3741   ASTContext &C = CGF.getContext();
3742   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3743   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3744   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3745                                  ? OMPD_taskloop
3746                                  : OMPD_task;
3747   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3748   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3749   LValue SrcBase;
3750   bool IsTargetTask =
3751       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3752       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3753   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3754   // PointersArray and SizesArray. The original variables for these arrays are
3755   // not captured and we get their addresses explicitly.
3756   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3757       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3758     SrcBase = CGF.MakeAddrLValue(
3759         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3760             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3761         SharedsTy);
3762   }
3763   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3764   for (const PrivateDataTy &Pair : Privates) {
3765     const VarDecl *VD = Pair.second.PrivateCopy;
3766     const Expr *Init = VD->getAnyInitializer();
3767     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3768                              !CGF.isTrivialInitializer(Init)))) {
3769       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3770       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3771         const VarDecl *OriginalVD = Pair.second.Original;
3772         // Check if the variable is the target-based BasePointersArray,
3773         // PointersArray or SizesArray.
3774         LValue SharedRefLValue;
3775         QualType Type = PrivateLValue.getType();
3776         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3777         if (IsTargetTask && !SharedField) {
3778           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3779                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3780                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3781                          ->getNumParams() == 0 &&
3782                  isa<TranslationUnitDecl>(
3783                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3784                          ->getDeclContext()) &&
3785                  "Expected artificial target data variable.");
3786           SharedRefLValue =
3787               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3788         } else if (ForDup) {
3789           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3790           SharedRefLValue = CGF.MakeAddrLValue(
3791               Address(SharedRefLValue.getPointer(CGF),
3792                       C.getDeclAlign(OriginalVD)),
3793               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3794               SharedRefLValue.getTBAAInfo());
3795         } else if (CGF.LambdaCaptureFields.count(
3796                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3797                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3798           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3799         } else {
3800           // Processing for implicitly captured variables.
3801           InlinedOpenMPRegionRAII Region(
3802               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3803               /*HasCancel=*/false);
3804           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3805         }
3806         if (Type->isArrayType()) {
3807           // Initialize firstprivate array.
3808           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3809             // Perform simple memcpy.
3810             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3811           } else {
3812             // Initialize firstprivate array using element-by-element
3813             // initialization.
3814             CGF.EmitOMPAggregateAssign(
3815                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3816                 Type,
3817                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3818                                                   Address SrcElement) {
3819                   // Clean up any temporaries needed by the initialization.
3820                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3821                   InitScope.addPrivate(
3822                       Elem, [SrcElement]() -> Address { return SrcElement; });
3823                   (void)InitScope.Privatize();
3824                   // Emit initialization for single element.
3825                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3826                       CGF, &CapturesInfo);
3827                   CGF.EmitAnyExprToMem(Init, DestElement,
3828                                        Init->getType().getQualifiers(),
3829                                        /*IsInitializer=*/false);
3830                 });
3831           }
3832         } else {
3833           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3834           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3835             return SharedRefLValue.getAddress(CGF);
3836           });
3837           (void)InitScope.Privatize();
3838           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3839           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3840                              /*capturedByInit=*/false);
3841         }
3842       } else {
3843         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3844       }
3845     }
3846     ++FI;
3847   }
3848 }
3849 
3850 /// Check if duplication function is required for taskloops.
3851 static bool checkInitIsRequired(CodeGenFunction &CGF,
3852                                 ArrayRef<PrivateDataTy> Privates) {
3853   bool InitRequired = false;
3854   for (const PrivateDataTy &Pair : Privates) {
3855     const VarDecl *VD = Pair.second.PrivateCopy;
3856     const Expr *Init = VD->getAnyInitializer();
3857     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3858                                     !CGF.isTrivialInitializer(Init));
3859     if (InitRequired)
3860       break;
3861   }
3862   return InitRequired;
3863 }
3864 
3865 
3866 /// Emit task_dup function (for initialization of
3867 /// private/firstprivate/lastprivate vars and last_iter flag)
3868 /// \code
3869 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3870 /// lastpriv) {
3871 /// // setup lastprivate flag
3872 ///    task_dst->last = lastpriv;
3873 /// // could be constructor calls here...
3874 /// }
3875 /// \endcode
3876 static llvm::Value *
3877 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3878                     const OMPExecutableDirective &D,
3879                     QualType KmpTaskTWithPrivatesPtrQTy,
3880                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3881                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3882                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3883                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3884   ASTContext &C = CGM.getContext();
3885   FunctionArgList Args;
3886   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3887                            KmpTaskTWithPrivatesPtrQTy,
3888                            ImplicitParamDecl::Other);
3889   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3890                            KmpTaskTWithPrivatesPtrQTy,
3891                            ImplicitParamDecl::Other);
3892   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3893                                 ImplicitParamDecl::Other);
3894   Args.push_back(&DstArg);
3895   Args.push_back(&SrcArg);
3896   Args.push_back(&LastprivArg);
3897   const auto &TaskDupFnInfo =
3898       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3899   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3900   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3901   auto *TaskDup = llvm::Function::Create(
3902       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3903   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3904   TaskDup->setDoesNotRecurse();
3905   CodeGenFunction CGF(CGM);
3906   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3907                     Loc);
3908 
3909   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3910       CGF.GetAddrOfLocalVar(&DstArg),
3911       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3912   // task_dst->liter = lastpriv;
3913   if (WithLastIter) {
3914     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3915     LValue Base = CGF.EmitLValueForField(
3916         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3917     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3918     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3919         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3920     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3921   }
3922 
3923   // Emit initial values for private copies (if any).
3924   assert(!Privates.empty());
3925   Address KmpTaskSharedsPtr = Address::invalid();
3926   if (!Data.FirstprivateVars.empty()) {
3927     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3928         CGF.GetAddrOfLocalVar(&SrcArg),
3929         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3930     LValue Base = CGF.EmitLValueForField(
3931         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3932     KmpTaskSharedsPtr = Address(
3933         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3934                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3935                                                   KmpTaskTShareds)),
3936                              Loc),
3937         CGM.getNaturalTypeAlignment(SharedsTy));
3938   }
3939   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3940                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3941   CGF.FinishFunction();
3942   return TaskDup;
3943 }
3944 
3945 /// Checks if destructor function is required to be generated.
3946 /// \return true if cleanups are required, false otherwise.
3947 static bool
3948 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3949   bool NeedsCleanup = false;
3950   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3951   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3952   for (const FieldDecl *FD : PrivateRD->fields()) {
3953     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3954     if (NeedsCleanup)
3955       break;
3956   }
3957   return NeedsCleanup;
3958 }
3959 
3960 namespace {
3961 /// Loop generator for OpenMP iterator expression.
3962 class OMPIteratorGeneratorScope final
3963     : public CodeGenFunction::OMPPrivateScope {
3964   CodeGenFunction &CGF;
3965   const OMPIteratorExpr *E = nullptr;
3966   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3967   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3968   OMPIteratorGeneratorScope() = delete;
3969   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3970 
3971 public:
3972   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3973       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3974     if (!E)
3975       return;
3976     SmallVector<llvm::Value *, 4> Uppers;
3977     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3978       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3979       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3980       addPrivate(VD, [&CGF, VD]() {
3981         return CGF.CreateMemTemp(VD->getType(), VD->getName());
3982       });
3983       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3984       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
3985         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
3986                                  "counter.addr");
3987       });
3988     }
3989     Privatize();
3990 
3991     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3992       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3993       LValue CLVal =
3994           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3995                              HelperData.CounterVD->getType());
3996       // Counter = 0;
3997       CGF.EmitStoreOfScalar(
3998           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3999           CLVal);
4000       CodeGenFunction::JumpDest &ContDest =
4001           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4002       CodeGenFunction::JumpDest &ExitDest =
4003           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4004       // N = <number-of_iterations>;
4005       llvm::Value *N = Uppers[I];
4006       // cont:
4007       // if (Counter < N) goto body; else goto exit;
4008       CGF.EmitBlock(ContDest.getBlock());
4009       auto *CVal =
4010           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4011       llvm::Value *Cmp =
4012           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4013               ? CGF.Builder.CreateICmpSLT(CVal, N)
4014               : CGF.Builder.CreateICmpULT(CVal, N);
4015       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4016       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4017       // body:
4018       CGF.EmitBlock(BodyBB);
4019       // Iteri = Begini + Counter * Stepi;
4020       CGF.EmitIgnoredExpr(HelperData.Update);
4021     }
4022   }
4023   ~OMPIteratorGeneratorScope() {
4024     if (!E)
4025       return;
4026     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4027       // Counter = Counter + 1;
4028       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4029       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4030       // goto cont;
4031       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4032       // exit:
4033       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4034     }
4035   }
4036 };
4037 } // namespace
4038 
4039 static std::pair<llvm::Value *, llvm::Value *>
4040 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4041   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4042   llvm::Value *Addr;
4043   if (OASE) {
4044     const Expr *Base = OASE->getBase();
4045     Addr = CGF.EmitScalarExpr(Base);
4046   } else {
4047     Addr = CGF.EmitLValue(E).getPointer(CGF);
4048   }
4049   llvm::Value *SizeVal;
4050   QualType Ty = E->getType();
4051   if (OASE) {
4052     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4053     for (const Expr *SE : OASE->getDimensions()) {
4054       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4055       Sz = CGF.EmitScalarConversion(
4056           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4057       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4058     }
4059   } else if (const auto *ASE =
4060                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4061     LValue UpAddrLVal =
4062         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4063     llvm::Value *UpAddr =
4064         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4065     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4066     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4067     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4068   } else {
4069     SizeVal = CGF.getTypeSize(Ty);
4070   }
4071   return std::make_pair(Addr, SizeVal);
4072 }
4073 
4074 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4075 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4076   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4077   if (KmpTaskAffinityInfoTy.isNull()) {
4078     RecordDecl *KmpAffinityInfoRD =
4079         C.buildImplicitRecord("kmp_task_affinity_info_t");
4080     KmpAffinityInfoRD->startDefinition();
4081     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4082     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4083     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4084     KmpAffinityInfoRD->completeDefinition();
4085     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4086   }
4087 }
4088 
4089 CGOpenMPRuntime::TaskResultTy
4090 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4091                               const OMPExecutableDirective &D,
4092                               llvm::Function *TaskFunction, QualType SharedsTy,
4093                               Address Shareds, const OMPTaskDataTy &Data) {
4094   ASTContext &C = CGM.getContext();
4095   llvm::SmallVector<PrivateDataTy, 4> Privates;
4096   // Aggregate privates and sort them by the alignment.
4097   const auto *I = Data.PrivateCopies.begin();
4098   for (const Expr *E : Data.PrivateVars) {
4099     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4100     Privates.emplace_back(
4101         C.getDeclAlign(VD),
4102         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4103                          /*PrivateElemInit=*/nullptr));
4104     ++I;
4105   }
4106   I = Data.FirstprivateCopies.begin();
4107   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4108   for (const Expr *E : Data.FirstprivateVars) {
4109     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4110     Privates.emplace_back(
4111         C.getDeclAlign(VD),
4112         PrivateHelpersTy(
4113             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4114             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4115     ++I;
4116     ++IElemInitRef;
4117   }
4118   I = Data.LastprivateCopies.begin();
4119   for (const Expr *E : Data.LastprivateVars) {
4120     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4121     Privates.emplace_back(
4122         C.getDeclAlign(VD),
4123         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4124                          /*PrivateElemInit=*/nullptr));
4125     ++I;
4126   }
4127   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4128     return L.first > R.first;
4129   });
4130   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4131   // Build type kmp_routine_entry_t (if not built yet).
4132   emitKmpRoutineEntryT(KmpInt32Ty);
4133   // Build type kmp_task_t (if not built yet).
4134   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4135     if (SavedKmpTaskloopTQTy.isNull()) {
4136       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4137           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4138     }
4139     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4140   } else {
4141     assert((D.getDirectiveKind() == OMPD_task ||
4142             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4143             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4144            "Expected taskloop, task or target directive");
4145     if (SavedKmpTaskTQTy.isNull()) {
4146       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4147           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4148     }
4149     KmpTaskTQTy = SavedKmpTaskTQTy;
4150   }
4151   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4152   // Build particular struct kmp_task_t for the given task.
4153   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4154       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4155   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4156   QualType KmpTaskTWithPrivatesPtrQTy =
4157       C.getPointerType(KmpTaskTWithPrivatesQTy);
4158   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4159   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4160       KmpTaskTWithPrivatesTy->getPointerTo();
4161   llvm::Value *KmpTaskTWithPrivatesTySize =
4162       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4163   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4164 
4165   // Emit initial values for private copies (if any).
4166   llvm::Value *TaskPrivatesMap = nullptr;
4167   llvm::Type *TaskPrivatesMapTy =
4168       std::next(TaskFunction->arg_begin(), 3)->getType();
4169   if (!Privates.empty()) {
4170     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4171     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4172         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4173         FI->getType(), Privates);
4174     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4175         TaskPrivatesMap, TaskPrivatesMapTy);
4176   } else {
4177     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4178         cast<llvm::PointerType>(TaskPrivatesMapTy));
4179   }
4180   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4181   // kmp_task_t *tt);
4182   llvm::Function *TaskEntry = emitProxyTaskFunction(
4183       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4184       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4185       TaskPrivatesMap);
4186 
4187   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4188   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4189   // kmp_routine_entry_t *task_entry);
4190   // Task flags. Format is taken from
4191   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4192   // description of kmp_tasking_flags struct.
4193   enum {
4194     TiedFlag = 0x1,
4195     FinalFlag = 0x2,
4196     DestructorsFlag = 0x8,
4197     PriorityFlag = 0x20,
4198     DetachableFlag = 0x40,
4199   };
4200   unsigned Flags = Data.Tied ? TiedFlag : 0;
4201   bool NeedsCleanup = false;
4202   if (!Privates.empty()) {
4203     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4204     if (NeedsCleanup)
4205       Flags = Flags | DestructorsFlag;
4206   }
4207   if (Data.Priority.getInt())
4208     Flags = Flags | PriorityFlag;
4209   if (D.hasClausesOfKind<OMPDetachClause>())
4210     Flags = Flags | DetachableFlag;
4211   llvm::Value *TaskFlags =
4212       Data.Final.getPointer()
4213           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4214                                      CGF.Builder.getInt32(FinalFlag),
4215                                      CGF.Builder.getInt32(/*C=*/0))
4216           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4217   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4218   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4219   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4220       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4221       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4222           TaskEntry, KmpRoutineEntryPtrTy)};
4223   llvm::Value *NewTask;
4224   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4225     // Check if we have any device clause associated with the directive.
4226     const Expr *Device = nullptr;
4227     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4228       Device = C->getDevice();
4229     // Emit device ID if any otherwise use default value.
4230     llvm::Value *DeviceID;
4231     if (Device)
4232       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4233                                            CGF.Int64Ty, /*isSigned=*/true);
4234     else
4235       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4236     AllocArgs.push_back(DeviceID);
4237     NewTask = CGF.EmitRuntimeCall(
4238         OMPBuilder.getOrCreateRuntimeFunction(
4239             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4240         AllocArgs);
4241   } else {
4242     NewTask =
4243         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4244                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4245                             AllocArgs);
4246   }
4247   // Emit detach clause initialization.
4248   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4249   // task_descriptor);
4250   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4251     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4252     LValue EvtLVal = CGF.EmitLValue(Evt);
4253 
4254     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4255     // int gtid, kmp_task_t *task);
4256     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4257     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4258     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4259     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4260         OMPBuilder.getOrCreateRuntimeFunction(
4261             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4262         {Loc, Tid, NewTask});
4263     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4264                                       Evt->getExprLoc());
4265     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4266   }
4267   // Process affinity clauses.
4268   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4269     // Process list of affinity data.
4270     ASTContext &C = CGM.getContext();
4271     Address AffinitiesArray = Address::invalid();
4272     // Calculate number of elements to form the array of affinity data.
4273     llvm::Value *NumOfElements = nullptr;
4274     unsigned NumAffinities = 0;
4275     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4276       if (const Expr *Modifier = C->getModifier()) {
4277         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4278         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4279           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4280           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4281           NumOfElements =
4282               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4283         }
4284       } else {
4285         NumAffinities += C->varlist_size();
4286       }
4287     }
4288     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4289     // Fields ids in kmp_task_affinity_info record.
4290     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4291 
4292     QualType KmpTaskAffinityInfoArrayTy;
4293     if (NumOfElements) {
4294       NumOfElements = CGF.Builder.CreateNUWAdd(
4295           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4296       OpaqueValueExpr OVE(
4297           Loc,
4298           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4299           VK_RValue);
4300       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4301                                                     RValue::get(NumOfElements));
4302       KmpTaskAffinityInfoArrayTy =
4303           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4304                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4305       // Properly emit variable-sized array.
4306       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4307                                            ImplicitParamDecl::Other);
4308       CGF.EmitVarDecl(*PD);
4309       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4310       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4311                                                 /*isSigned=*/false);
4312     } else {
4313       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4314           KmpTaskAffinityInfoTy,
4315           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4316           ArrayType::Normal, /*IndexTypeQuals=*/0);
4317       AffinitiesArray =
4318           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4319       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4320       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4321                                              /*isSigned=*/false);
4322     }
4323 
4324     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4325     // Fill array by elements without iterators.
4326     unsigned Pos = 0;
4327     bool HasIterator = false;
4328     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4329       if (C->getModifier()) {
4330         HasIterator = true;
4331         continue;
4332       }
4333       for (const Expr *E : C->varlists()) {
4334         llvm::Value *Addr;
4335         llvm::Value *Size;
4336         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4337         LValue Base =
4338             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4339                                KmpTaskAffinityInfoTy);
4340         // affs[i].base_addr = &<Affinities[i].second>;
4341         LValue BaseAddrLVal = CGF.EmitLValueForField(
4342             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4343         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4344                               BaseAddrLVal);
4345         // affs[i].len = sizeof(<Affinities[i].second>);
4346         LValue LenLVal = CGF.EmitLValueForField(
4347             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4348         CGF.EmitStoreOfScalar(Size, LenLVal);
4349         ++Pos;
4350       }
4351     }
4352     LValue PosLVal;
4353     if (HasIterator) {
4354       PosLVal = CGF.MakeAddrLValue(
4355           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4356           C.getSizeType());
4357       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4358     }
4359     // Process elements with iterators.
4360     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4361       const Expr *Modifier = C->getModifier();
4362       if (!Modifier)
4363         continue;
4364       OMPIteratorGeneratorScope IteratorScope(
4365           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4366       for (const Expr *E : C->varlists()) {
4367         llvm::Value *Addr;
4368         llvm::Value *Size;
4369         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4370         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4371         LValue Base = CGF.MakeAddrLValue(
4372             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4373                     AffinitiesArray.getAlignment()),
4374             KmpTaskAffinityInfoTy);
4375         // affs[i].base_addr = &<Affinities[i].second>;
4376         LValue BaseAddrLVal = CGF.EmitLValueForField(
4377             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4378         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4379                               BaseAddrLVal);
4380         // affs[i].len = sizeof(<Affinities[i].second>);
4381         LValue LenLVal = CGF.EmitLValueForField(
4382             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4383         CGF.EmitStoreOfScalar(Size, LenLVal);
4384         Idx = CGF.Builder.CreateNUWAdd(
4385             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4386         CGF.EmitStoreOfScalar(Idx, PosLVal);
4387       }
4388     }
4389     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4390     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4391     // naffins, kmp_task_affinity_info_t *affin_list);
4392     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4393     llvm::Value *GTid = getThreadID(CGF, Loc);
4394     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4395         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4396     // FIXME: Emit the function and ignore its result for now unless the
4397     // runtime function is properly implemented.
4398     (void)CGF.EmitRuntimeCall(
4399         OMPBuilder.getOrCreateRuntimeFunction(
4400             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4401         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4402   }
4403   llvm::Value *NewTaskNewTaskTTy =
4404       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4405           NewTask, KmpTaskTWithPrivatesPtrTy);
4406   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4407                                                KmpTaskTWithPrivatesQTy);
4408   LValue TDBase =
4409       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4410   // Fill the data in the resulting kmp_task_t record.
4411   // Copy shareds if there are any.
4412   Address KmpTaskSharedsPtr = Address::invalid();
4413   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4414     KmpTaskSharedsPtr =
4415         Address(CGF.EmitLoadOfScalar(
4416                     CGF.EmitLValueForField(
4417                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4418                                            KmpTaskTShareds)),
4419                     Loc),
4420                 CGM.getNaturalTypeAlignment(SharedsTy));
4421     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4422     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4423     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4424   }
4425   // Emit initial values for private copies (if any).
4426   TaskResultTy Result;
4427   if (!Privates.empty()) {
4428     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4429                      SharedsTy, SharedsPtrTy, Data, Privates,
4430                      /*ForDup=*/false);
4431     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4432         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4433       Result.TaskDupFn = emitTaskDupFunction(
4434           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4435           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4436           /*WithLastIter=*/!Data.LastprivateVars.empty());
4437     }
4438   }
4439   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4440   enum { Priority = 0, Destructors = 1 };
4441   // Provide pointer to function with destructors for privates.
4442   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4443   const RecordDecl *KmpCmplrdataUD =
4444       (*FI)->getType()->getAsUnionType()->getDecl();
4445   if (NeedsCleanup) {
4446     llvm::Value *DestructorFn = emitDestructorsFunction(
4447         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4448         KmpTaskTWithPrivatesQTy);
4449     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4450     LValue DestructorsLV = CGF.EmitLValueForField(
4451         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4452     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4453                               DestructorFn, KmpRoutineEntryPtrTy),
4454                           DestructorsLV);
4455   }
4456   // Set priority.
4457   if (Data.Priority.getInt()) {
4458     LValue Data2LV = CGF.EmitLValueForField(
4459         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4460     LValue PriorityLV = CGF.EmitLValueForField(
4461         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4462     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4463   }
4464   Result.NewTask = NewTask;
4465   Result.TaskEntry = TaskEntry;
4466   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4467   Result.TDBase = TDBase;
4468   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4469   return Result;
4470 }
4471 
4472 namespace {
4473 /// Dependence kind for RTL.
4474 enum RTLDependenceKindTy {
4475   DepIn = 0x01,
4476   DepInOut = 0x3,
4477   DepMutexInOutSet = 0x4
4478 };
4479 /// Fields ids in kmp_depend_info record.
4480 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4481 } // namespace
4482 
4483 /// Translates internal dependency kind into the runtime kind.
4484 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4485   RTLDependenceKindTy DepKind;
4486   switch (K) {
4487   case OMPC_DEPEND_in:
4488     DepKind = DepIn;
4489     break;
4490   // Out and InOut dependencies must use the same code.
4491   case OMPC_DEPEND_out:
4492   case OMPC_DEPEND_inout:
4493     DepKind = DepInOut;
4494     break;
4495   case OMPC_DEPEND_mutexinoutset:
4496     DepKind = DepMutexInOutSet;
4497     break;
4498   case OMPC_DEPEND_source:
4499   case OMPC_DEPEND_sink:
4500   case OMPC_DEPEND_depobj:
4501   case OMPC_DEPEND_unknown:
4502     llvm_unreachable("Unknown task dependence type");
4503   }
4504   return DepKind;
4505 }
4506 
4507 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4508 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4509                            QualType &FlagsTy) {
4510   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4511   if (KmpDependInfoTy.isNull()) {
4512     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4513     KmpDependInfoRD->startDefinition();
4514     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4515     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4516     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4517     KmpDependInfoRD->completeDefinition();
4518     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4519   }
4520 }
4521 
4522 std::pair<llvm::Value *, LValue>
4523 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4524                                    SourceLocation Loc) {
4525   ASTContext &C = CGM.getContext();
4526   QualType FlagsTy;
4527   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4528   RecordDecl *KmpDependInfoRD =
4529       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4530   LValue Base = CGF.EmitLoadOfPointerLValue(
4531       DepobjLVal.getAddress(CGF),
4532       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4533   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4534   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4536   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4537                             Base.getTBAAInfo());
4538   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4539       Addr.getPointer(),
4540       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4541   LValue NumDepsBase = CGF.MakeAddrLValue(
4542       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4543       Base.getBaseInfo(), Base.getTBAAInfo());
4544   // NumDeps = deps[i].base_addr;
4545   LValue BaseAddrLVal = CGF.EmitLValueForField(
4546       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4547   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4548   return std::make_pair(NumDeps, Base);
4549 }
4550 
4551 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4552                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4553                            const OMPTaskDataTy::DependData &Data,
4554                            Address DependenciesArray) {
4555   CodeGenModule &CGM = CGF.CGM;
4556   ASTContext &C = CGM.getContext();
4557   QualType FlagsTy;
4558   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4559   RecordDecl *KmpDependInfoRD =
4560       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4561   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4562 
4563   OMPIteratorGeneratorScope IteratorScope(
4564       CGF, cast_or_null<OMPIteratorExpr>(
4565                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4566                                  : nullptr));
4567   for (const Expr *E : Data.DepExprs) {
4568     llvm::Value *Addr;
4569     llvm::Value *Size;
4570     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4571     LValue Base;
4572     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4573       Base = CGF.MakeAddrLValue(
4574           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4575     } else {
4576       LValue &PosLVal = *Pos.get<LValue *>();
4577       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4578       Base = CGF.MakeAddrLValue(
4579           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4580                   DependenciesArray.getAlignment()),
4581           KmpDependInfoTy);
4582     }
4583     // deps[i].base_addr = &<Dependencies[i].second>;
4584     LValue BaseAddrLVal = CGF.EmitLValueForField(
4585         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4586     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4587                           BaseAddrLVal);
4588     // deps[i].len = sizeof(<Dependencies[i].second>);
4589     LValue LenLVal = CGF.EmitLValueForField(
4590         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4591     CGF.EmitStoreOfScalar(Size, LenLVal);
4592     // deps[i].flags = <Dependencies[i].first>;
4593     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4594     LValue FlagsLVal = CGF.EmitLValueForField(
4595         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4596     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4597                           FlagsLVal);
4598     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4599       ++(*P);
4600     } else {
4601       LValue &PosLVal = *Pos.get<LValue *>();
4602       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4603       Idx = CGF.Builder.CreateNUWAdd(Idx,
4604                                      llvm::ConstantInt::get(Idx->getType(), 1));
4605       CGF.EmitStoreOfScalar(Idx, PosLVal);
4606     }
4607   }
4608 }
4609 
4610 static SmallVector<llvm::Value *, 4>
4611 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4612                         const OMPTaskDataTy::DependData &Data) {
4613   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4614          "Expected depobj dependecy kind.");
4615   SmallVector<llvm::Value *, 4> Sizes;
4616   SmallVector<LValue, 4> SizeLVals;
4617   ASTContext &C = CGF.getContext();
4618   QualType FlagsTy;
4619   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4620   RecordDecl *KmpDependInfoRD =
4621       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4622   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4623   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4624   {
4625     OMPIteratorGeneratorScope IteratorScope(
4626         CGF, cast_or_null<OMPIteratorExpr>(
4627                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4628                                    : nullptr));
4629     for (const Expr *E : Data.DepExprs) {
4630       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4631       LValue Base = CGF.EmitLoadOfPointerLValue(
4632           DepobjLVal.getAddress(CGF),
4633           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4634       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4635           Base.getAddress(CGF), KmpDependInfoPtrT);
4636       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4637                                 Base.getTBAAInfo());
4638       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4639           Addr.getPointer(),
4640           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4641       LValue NumDepsBase = CGF.MakeAddrLValue(
4642           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4643           Base.getBaseInfo(), Base.getTBAAInfo());
4644       // NumDeps = deps[i].base_addr;
4645       LValue BaseAddrLVal = CGF.EmitLValueForField(
4646           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4647       llvm::Value *NumDeps =
4648           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4649       LValue NumLVal = CGF.MakeAddrLValue(
4650           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4651           C.getUIntPtrType());
4652       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4653                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4654       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4655       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4656       CGF.EmitStoreOfScalar(Add, NumLVal);
4657       SizeLVals.push_back(NumLVal);
4658     }
4659   }
4660   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4661     llvm::Value *Size =
4662         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4663     Sizes.push_back(Size);
4664   }
4665   return Sizes;
4666 }
4667 
4668 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4669                                LValue PosLVal,
4670                                const OMPTaskDataTy::DependData &Data,
4671                                Address DependenciesArray) {
4672   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4673          "Expected depobj dependecy kind.");
4674   ASTContext &C = CGF.getContext();
4675   QualType FlagsTy;
4676   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4677   RecordDecl *KmpDependInfoRD =
4678       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4679   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4680   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4681   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4682   {
4683     OMPIteratorGeneratorScope IteratorScope(
4684         CGF, cast_or_null<OMPIteratorExpr>(
4685                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4686                                    : nullptr));
4687     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4688       const Expr *E = Data.DepExprs[I];
4689       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4690       LValue Base = CGF.EmitLoadOfPointerLValue(
4691           DepobjLVal.getAddress(CGF),
4692           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4693       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4694           Base.getAddress(CGF), KmpDependInfoPtrT);
4695       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4696                                 Base.getTBAAInfo());
4697 
4698       // Get number of elements in a single depobj.
4699       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4700           Addr.getPointer(),
4701           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4702       LValue NumDepsBase = CGF.MakeAddrLValue(
4703           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4704           Base.getBaseInfo(), Base.getTBAAInfo());
4705       // NumDeps = deps[i].base_addr;
4706       LValue BaseAddrLVal = CGF.EmitLValueForField(
4707           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4708       llvm::Value *NumDeps =
4709           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4710 
4711       // memcopy dependency data.
4712       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4713           ElSize,
4714           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4715       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4716       Address DepAddr =
4717           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4718                   DependenciesArray.getAlignment());
4719       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4720 
4721       // Increase pos.
4722       // pos += size;
4723       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4724       CGF.EmitStoreOfScalar(Add, PosLVal);
4725     }
4726   }
4727 }
4728 
4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4730     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4731     SourceLocation Loc) {
4732   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4733         return D.DepExprs.empty();
4734       }))
4735     return std::make_pair(nullptr, Address::invalid());
4736   // Process list of dependencies.
4737   ASTContext &C = CGM.getContext();
4738   Address DependenciesArray = Address::invalid();
4739   llvm::Value *NumOfElements = nullptr;
4740   unsigned NumDependencies = std::accumulate(
4741       Dependencies.begin(), Dependencies.end(), 0,
4742       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4743         return D.DepKind == OMPC_DEPEND_depobj
4744                    ? V
4745                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4746       });
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   bool HasDepobjDeps = false;
4750   bool HasRegularWithIterators = false;
4751   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4752   llvm::Value *NumOfRegularWithIterators =
4753       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4754   // Calculate number of depobj dependecies and regular deps with the iterators.
4755   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4756     if (D.DepKind == OMPC_DEPEND_depobj) {
4757       SmallVector<llvm::Value *, 4> Sizes =
4758           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4759       for (llvm::Value *Size : Sizes) {
4760         NumOfDepobjElements =
4761             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4762       }
4763       HasDepobjDeps = true;
4764       continue;
4765     }
4766     // Include number of iterations, if any.
4767     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4768       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4769         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4770         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4771         NumOfRegularWithIterators =
4772             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4773       }
4774       HasRegularWithIterators = true;
4775       continue;
4776     }
4777   }
4778 
4779   QualType KmpDependInfoArrayTy;
4780   if (HasDepobjDeps || HasRegularWithIterators) {
4781     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4782                                            /*isSigned=*/false);
4783     if (HasDepobjDeps) {
4784       NumOfElements =
4785           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4786     }
4787     if (HasRegularWithIterators) {
4788       NumOfElements =
4789           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4790     }
4791     OpaqueValueExpr OVE(Loc,
4792                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4793                         VK_RValue);
4794     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4795                                                   RValue::get(NumOfElements));
4796     KmpDependInfoArrayTy =
4797         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4798                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4799     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4800     // Properly emit variable-sized array.
4801     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4802                                          ImplicitParamDecl::Other);
4803     CGF.EmitVarDecl(*PD);
4804     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4805     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4806                                               /*isSigned=*/false);
4807   } else {
4808     KmpDependInfoArrayTy = C.getConstantArrayType(
4809         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4810         ArrayType::Normal, /*IndexTypeQuals=*/0);
4811     DependenciesArray =
4812         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4813     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4814     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4815                                            /*isSigned=*/false);
4816   }
4817   unsigned Pos = 0;
4818   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4819     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4820         Dependencies[I].IteratorExpr)
4821       continue;
4822     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4823                    DependenciesArray);
4824   }
4825   // Copy regular dependecies with iterators.
4826   LValue PosLVal = CGF.MakeAddrLValue(
4827       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4828   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4829   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4830     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4831         !Dependencies[I].IteratorExpr)
4832       continue;
4833     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4834                    DependenciesArray);
4835   }
4836   // Copy final depobj arrays without iterators.
4837   if (HasDepobjDeps) {
4838     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4839       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4840         continue;
4841       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4842                          DependenciesArray);
4843     }
4844   }
4845   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4846       DependenciesArray, CGF.VoidPtrTy);
4847   return std::make_pair(NumOfElements, DependenciesArray);
4848 }
4849 
4850 Address CGOpenMPRuntime::emitDepobjDependClause(
4851     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4852     SourceLocation Loc) {
4853   if (Dependencies.DepExprs.empty())
4854     return Address::invalid();
4855   // Process list of dependencies.
4856   ASTContext &C = CGM.getContext();
4857   Address DependenciesArray = Address::invalid();
4858   unsigned NumDependencies = Dependencies.DepExprs.size();
4859   QualType FlagsTy;
4860   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4861   RecordDecl *KmpDependInfoRD =
4862       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4863 
4864   llvm::Value *Size;
4865   // Define type kmp_depend_info[<Dependencies.size()>];
4866   // For depobj reserve one extra element to store the number of elements.
4867   // It is required to handle depobj(x) update(in) construct.
4868   // kmp_depend_info[<Dependencies.size()>] deps;
4869   llvm::Value *NumDepsVal;
4870   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4871   if (const auto *IE =
4872           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4873     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4874     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4875       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4876       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4877       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4878     }
4879     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4880                                     NumDepsVal);
4881     CharUnits SizeInBytes =
4882         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4883     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4884     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4885     NumDepsVal =
4886         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4887   } else {
4888     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4889         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4890         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4891     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4892     Size = CGM.getSize(Sz.alignTo(Align));
4893     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4894   }
4895   // Need to allocate on the dynamic memory.
4896   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4897   // Use default allocator.
4898   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4899   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4900 
4901   llvm::Value *Addr =
4902       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4903                               CGM.getModule(), OMPRTL___kmpc_alloc),
4904                           Args, ".dep.arr.addr");
4905   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4906       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4907   DependenciesArray = Address(Addr, Align);
4908   // Write number of elements in the first element of array for depobj.
4909   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4910   // deps[i].base_addr = NumDependencies;
4911   LValue BaseAddrLVal = CGF.EmitLValueForField(
4912       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4913   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4914   llvm::PointerUnion<unsigned *, LValue *> Pos;
4915   unsigned Idx = 1;
4916   LValue PosLVal;
4917   if (Dependencies.IteratorExpr) {
4918     PosLVal = CGF.MakeAddrLValue(
4919         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4920         C.getSizeType());
4921     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4922                           /*IsInit=*/true);
4923     Pos = &PosLVal;
4924   } else {
4925     Pos = &Idx;
4926   }
4927   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4928   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4929       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4930   return DependenciesArray;
4931 }
4932 
4933 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4934                                         SourceLocation Loc) {
4935   ASTContext &C = CGM.getContext();
4936   QualType FlagsTy;
4937   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4938   LValue Base = CGF.EmitLoadOfPointerLValue(
4939       DepobjLVal.getAddress(CGF),
4940       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4941   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4942   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4943       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4944   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4945       Addr.getPointer(),
4946       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4947   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4948                                                                CGF.VoidPtrTy);
4949   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4950   // Use default allocator.
4951   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4952   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4953 
4954   // _kmpc_free(gtid, addr, nullptr);
4955   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4956                                 CGM.getModule(), OMPRTL___kmpc_free),
4957                             Args);
4958 }
4959 
4960 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4961                                        OpenMPDependClauseKind NewDepKind,
4962                                        SourceLocation Loc) {
4963   ASTContext &C = CGM.getContext();
4964   QualType FlagsTy;
4965   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4966   RecordDecl *KmpDependInfoRD =
4967       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4968   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4969   llvm::Value *NumDeps;
4970   LValue Base;
4971   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4972 
4973   Address Begin = Base.getAddress(CGF);
4974   // Cast from pointer to array type to pointer to single element.
4975   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
4976   // The basic structure here is a while-do loop.
4977   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4978   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4979   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4980   CGF.EmitBlock(BodyBB);
4981   llvm::PHINode *ElementPHI =
4982       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4983   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4984   Begin = Address(ElementPHI, Begin.getAlignment());
4985   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4986                             Base.getTBAAInfo());
4987   // deps[i].flags = NewDepKind;
4988   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4989   LValue FlagsLVal = CGF.EmitLValueForField(
4990       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4991   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4992                         FlagsLVal);
4993 
4994   // Shift the address forward by one element.
4995   Address ElementNext =
4996       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4997   ElementPHI->addIncoming(ElementNext.getPointer(),
4998                           CGF.Builder.GetInsertBlock());
4999   llvm::Value *IsEmpty =
5000       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5001   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5002   // Done.
5003   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5004 }
5005 
5006 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5007                                    const OMPExecutableDirective &D,
5008                                    llvm::Function *TaskFunction,
5009                                    QualType SharedsTy, Address Shareds,
5010                                    const Expr *IfCond,
5011                                    const OMPTaskDataTy &Data) {
5012   if (!CGF.HaveInsertPoint())
5013     return;
5014 
5015   TaskResultTy Result =
5016       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5017   llvm::Value *NewTask = Result.NewTask;
5018   llvm::Function *TaskEntry = Result.TaskEntry;
5019   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5020   LValue TDBase = Result.TDBase;
5021   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5022   // Process list of dependences.
5023   Address DependenciesArray = Address::invalid();
5024   llvm::Value *NumOfElements;
5025   std::tie(NumOfElements, DependenciesArray) =
5026       emitDependClause(CGF, Data.Dependences, Loc);
5027 
5028   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5029   // libcall.
5030   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5031   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5032   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5033   // list is not empty
5034   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5035   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5036   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5037   llvm::Value *DepTaskArgs[7];
5038   if (!Data.Dependences.empty()) {
5039     DepTaskArgs[0] = UpLoc;
5040     DepTaskArgs[1] = ThreadID;
5041     DepTaskArgs[2] = NewTask;
5042     DepTaskArgs[3] = NumOfElements;
5043     DepTaskArgs[4] = DependenciesArray.getPointer();
5044     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5045     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5046   }
5047   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5048                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5049     if (!Data.Tied) {
5050       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5051       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5052       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5053     }
5054     if (!Data.Dependences.empty()) {
5055       CGF.EmitRuntimeCall(
5056           OMPBuilder.getOrCreateRuntimeFunction(
5057               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5058           DepTaskArgs);
5059     } else {
5060       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5061                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5062                           TaskArgs);
5063     }
5064     // Check if parent region is untied and build return for untied task;
5065     if (auto *Region =
5066             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5067       Region->emitUntiedSwitch(CGF);
5068   };
5069 
5070   llvm::Value *DepWaitTaskArgs[6];
5071   if (!Data.Dependences.empty()) {
5072     DepWaitTaskArgs[0] = UpLoc;
5073     DepWaitTaskArgs[1] = ThreadID;
5074     DepWaitTaskArgs[2] = NumOfElements;
5075     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5076     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5077     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5078   }
5079   auto &M = CGM.getModule();
5080   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5081                         TaskEntry, &Data, &DepWaitTaskArgs,
5082                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5083     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5084     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5085     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5086     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5087     // is specified.
5088     if (!Data.Dependences.empty())
5089       CGF.EmitRuntimeCall(
5090           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5091           DepWaitTaskArgs);
5092     // Call proxy_task_entry(gtid, new_task);
5093     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5094                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5095       Action.Enter(CGF);
5096       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5097       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5098                                                           OutlinedFnArgs);
5099     };
5100 
5101     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5102     // kmp_task_t *new_task);
5103     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5104     // kmp_task_t *new_task);
5105     RegionCodeGenTy RCG(CodeGen);
5106     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5107                               M, OMPRTL___kmpc_omp_task_begin_if0),
5108                           TaskArgs,
5109                           OMPBuilder.getOrCreateRuntimeFunction(
5110                               M, OMPRTL___kmpc_omp_task_complete_if0),
5111                           TaskArgs);
5112     RCG.setAction(Action);
5113     RCG(CGF);
5114   };
5115 
5116   if (IfCond) {
5117     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5118   } else {
5119     RegionCodeGenTy ThenRCG(ThenCodeGen);
5120     ThenRCG(CGF);
5121   }
5122 }
5123 
5124 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5125                                        const OMPLoopDirective &D,
5126                                        llvm::Function *TaskFunction,
5127                                        QualType SharedsTy, Address Shareds,
5128                                        const Expr *IfCond,
5129                                        const OMPTaskDataTy &Data) {
5130   if (!CGF.HaveInsertPoint())
5131     return;
5132   TaskResultTy Result =
5133       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5134   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5135   // libcall.
5136   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5137   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5138   // sched, kmp_uint64 grainsize, void *task_dup);
5139   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5140   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5141   llvm::Value *IfVal;
5142   if (IfCond) {
5143     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5144                                       /*isSigned=*/true);
5145   } else {
5146     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5147   }
5148 
5149   LValue LBLVal = CGF.EmitLValueForField(
5150       Result.TDBase,
5151       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5152   const auto *LBVar =
5153       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5154   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5155                        LBLVal.getQuals(),
5156                        /*IsInitializer=*/true);
5157   LValue UBLVal = CGF.EmitLValueForField(
5158       Result.TDBase,
5159       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5160   const auto *UBVar =
5161       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5162   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5163                        UBLVal.getQuals(),
5164                        /*IsInitializer=*/true);
5165   LValue StLVal = CGF.EmitLValueForField(
5166       Result.TDBase,
5167       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5168   const auto *StVar =
5169       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5170   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5171                        StLVal.getQuals(),
5172                        /*IsInitializer=*/true);
5173   // Store reductions address.
5174   LValue RedLVal = CGF.EmitLValueForField(
5175       Result.TDBase,
5176       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5177   if (Data.Reductions) {
5178     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5179   } else {
5180     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5181                                CGF.getContext().VoidPtrTy);
5182   }
5183   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5184   llvm::Value *TaskArgs[] = {
5185       UpLoc,
5186       ThreadID,
5187       Result.NewTask,
5188       IfVal,
5189       LBLVal.getPointer(CGF),
5190       UBLVal.getPointer(CGF),
5191       CGF.EmitLoadOfScalar(StLVal, Loc),
5192       llvm::ConstantInt::getSigned(
5193           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5194       llvm::ConstantInt::getSigned(
5195           CGF.IntTy, Data.Schedule.getPointer()
5196                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5197                          : NoSchedule),
5198       Data.Schedule.getPointer()
5199           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5200                                       /*isSigned=*/false)
5201           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5202       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5203                              Result.TaskDupFn, CGF.VoidPtrTy)
5204                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5205   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5206                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5207                       TaskArgs);
5208 }
5209 
5210 /// Emit reduction operation for each element of array (required for
5211 /// array sections) LHS op = RHS.
5212 /// \param Type Type of array.
5213 /// \param LHSVar Variable on the left side of the reduction operation
5214 /// (references element of array in original variable).
5215 /// \param RHSVar Variable on the right side of the reduction operation
5216 /// (references element of array in original variable).
5217 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5218 /// RHSVar.
5219 static void EmitOMPAggregateReduction(
5220     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5221     const VarDecl *RHSVar,
5222     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5223                                   const Expr *, const Expr *)> &RedOpGen,
5224     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5225     const Expr *UpExpr = nullptr) {
5226   // Perform element-by-element initialization.
5227   QualType ElementTy;
5228   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5229   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5230 
5231   // Drill down to the base element type on both arrays.
5232   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5233   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5234 
5235   llvm::Value *RHSBegin = RHSAddr.getPointer();
5236   llvm::Value *LHSBegin = LHSAddr.getPointer();
5237   // Cast from pointer to array type to pointer to single element.
5238   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5239   // The basic structure here is a while-do loop.
5240   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5241   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5242   llvm::Value *IsEmpty =
5243       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5244   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5245 
5246   // Enter the loop body, making that address the current address.
5247   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5248   CGF.EmitBlock(BodyBB);
5249 
5250   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5251 
5252   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5253       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5254   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5255   Address RHSElementCurrent =
5256       Address(RHSElementPHI,
5257               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5258 
5259   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5260       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5261   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5262   Address LHSElementCurrent =
5263       Address(LHSElementPHI,
5264               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5265 
5266   // Emit copy.
5267   CodeGenFunction::OMPPrivateScope Scope(CGF);
5268   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5269   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5270   Scope.Privatize();
5271   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5272   Scope.ForceCleanup();
5273 
5274   // Shift the address forward by one element.
5275   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5276       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5277   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5278       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5279   // Check whether we've reached the end.
5280   llvm::Value *Done =
5281       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5282   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5283   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5284   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5285 
5286   // Done.
5287   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5288 }
5289 
5290 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5291 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5292 /// UDR combiner function.
5293 static void emitReductionCombiner(CodeGenFunction &CGF,
5294                                   const Expr *ReductionOp) {
5295   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5296     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5297       if (const auto *DRE =
5298               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5299         if (const auto *DRD =
5300                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5301           std::pair<llvm::Function *, llvm::Function *> Reduction =
5302               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5303           RValue Func = RValue::get(Reduction.first);
5304           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5305           CGF.EmitIgnoredExpr(ReductionOp);
5306           return;
5307         }
5308   CGF.EmitIgnoredExpr(ReductionOp);
5309 }
5310 
5311 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5312     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5313     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5314     ArrayRef<const Expr *> ReductionOps) {
5315   ASTContext &C = CGM.getContext();
5316 
5317   // void reduction_func(void *LHSArg, void *RHSArg);
5318   FunctionArgList Args;
5319   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5320                            ImplicitParamDecl::Other);
5321   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5322                            ImplicitParamDecl::Other);
5323   Args.push_back(&LHSArg);
5324   Args.push_back(&RHSArg);
5325   const auto &CGFI =
5326       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5327   std::string Name = getName({"omp", "reduction", "reduction_func"});
5328   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5329                                     llvm::GlobalValue::InternalLinkage, Name,
5330                                     &CGM.getModule());
5331   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5332   Fn->setDoesNotRecurse();
5333   CodeGenFunction CGF(CGM);
5334   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5335 
5336   // Dst = (void*[n])(LHSArg);
5337   // Src = (void*[n])(RHSArg);
5338   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5339       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5340       ArgsType), CGF.getPointerAlign());
5341   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5342       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5343       ArgsType), CGF.getPointerAlign());
5344 
5345   //  ...
5346   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5347   //  ...
5348   CodeGenFunction::OMPPrivateScope Scope(CGF);
5349   auto IPriv = Privates.begin();
5350   unsigned Idx = 0;
5351   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5352     const auto *RHSVar =
5353         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5354     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5355       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5356     });
5357     const auto *LHSVar =
5358         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5359     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5360       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5361     });
5362     QualType PrivTy = (*IPriv)->getType();
5363     if (PrivTy->isVariablyModifiedType()) {
5364       // Get array size and emit VLA type.
5365       ++Idx;
5366       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5367       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5368       const VariableArrayType *VLA =
5369           CGF.getContext().getAsVariableArrayType(PrivTy);
5370       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5371       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5372           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5373       CGF.EmitVariablyModifiedType(PrivTy);
5374     }
5375   }
5376   Scope.Privatize();
5377   IPriv = Privates.begin();
5378   auto ILHS = LHSExprs.begin();
5379   auto IRHS = RHSExprs.begin();
5380   for (const Expr *E : ReductionOps) {
5381     if ((*IPriv)->getType()->isArrayType()) {
5382       // Emit reduction for array section.
5383       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5384       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5385       EmitOMPAggregateReduction(
5386           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5387           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5388             emitReductionCombiner(CGF, E);
5389           });
5390     } else {
5391       // Emit reduction for array subscript or single variable.
5392       emitReductionCombiner(CGF, E);
5393     }
5394     ++IPriv;
5395     ++ILHS;
5396     ++IRHS;
5397   }
5398   Scope.ForceCleanup();
5399   CGF.FinishFunction();
5400   return Fn;
5401 }
5402 
5403 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5404                                                   const Expr *ReductionOp,
5405                                                   const Expr *PrivateRef,
5406                                                   const DeclRefExpr *LHS,
5407                                                   const DeclRefExpr *RHS) {
5408   if (PrivateRef->getType()->isArrayType()) {
5409     // Emit reduction for array section.
5410     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5411     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5412     EmitOMPAggregateReduction(
5413         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5414         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5415           emitReductionCombiner(CGF, ReductionOp);
5416         });
5417   } else {
5418     // Emit reduction for array subscript or single variable.
5419     emitReductionCombiner(CGF, ReductionOp);
5420   }
5421 }
5422 
5423 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5424                                     ArrayRef<const Expr *> Privates,
5425                                     ArrayRef<const Expr *> LHSExprs,
5426                                     ArrayRef<const Expr *> RHSExprs,
5427                                     ArrayRef<const Expr *> ReductionOps,
5428                                     ReductionOptionsTy Options) {
5429   if (!CGF.HaveInsertPoint())
5430     return;
5431 
5432   bool WithNowait = Options.WithNowait;
5433   bool SimpleReduction = Options.SimpleReduction;
5434 
5435   // Next code should be emitted for reduction:
5436   //
5437   // static kmp_critical_name lock = { 0 };
5438   //
5439   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5440   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5441   //  ...
5442   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5443   //  *(Type<n>-1*)rhs[<n>-1]);
5444   // }
5445   //
5446   // ...
5447   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5448   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5449   // RedList, reduce_func, &<lock>)) {
5450   // case 1:
5451   //  ...
5452   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5453   //  ...
5454   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5455   // break;
5456   // case 2:
5457   //  ...
5458   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5459   //  ...
5460   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5461   // break;
5462   // default:;
5463   // }
5464   //
5465   // if SimpleReduction is true, only the next code is generated:
5466   //  ...
5467   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5468   //  ...
5469 
5470   ASTContext &C = CGM.getContext();
5471 
5472   if (SimpleReduction) {
5473     CodeGenFunction::RunCleanupsScope Scope(CGF);
5474     auto IPriv = Privates.begin();
5475     auto ILHS = LHSExprs.begin();
5476     auto IRHS = RHSExprs.begin();
5477     for (const Expr *E : ReductionOps) {
5478       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5479                                   cast<DeclRefExpr>(*IRHS));
5480       ++IPriv;
5481       ++ILHS;
5482       ++IRHS;
5483     }
5484     return;
5485   }
5486 
5487   // 1. Build a list of reduction variables.
5488   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5489   auto Size = RHSExprs.size();
5490   for (const Expr *E : Privates) {
5491     if (E->getType()->isVariablyModifiedType())
5492       // Reserve place for array size.
5493       ++Size;
5494   }
5495   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5496   QualType ReductionArrayTy =
5497       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5498                              /*IndexTypeQuals=*/0);
5499   Address ReductionList =
5500       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5501   auto IPriv = Privates.begin();
5502   unsigned Idx = 0;
5503   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5504     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5505     CGF.Builder.CreateStore(
5506         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5507             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5508         Elem);
5509     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5510       // Store array size.
5511       ++Idx;
5512       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5513       llvm::Value *Size = CGF.Builder.CreateIntCast(
5514           CGF.getVLASize(
5515                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5516               .NumElts,
5517           CGF.SizeTy, /*isSigned=*/false);
5518       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5519                               Elem);
5520     }
5521   }
5522 
5523   // 2. Emit reduce_func().
5524   llvm::Function *ReductionFn = emitReductionFunction(
5525       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5526       LHSExprs, RHSExprs, ReductionOps);
5527 
5528   // 3. Create static kmp_critical_name lock = { 0 };
5529   std::string Name = getName({"reduction"});
5530   llvm::Value *Lock = getCriticalRegionLock(Name);
5531 
5532   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5533   // RedList, reduce_func, &<lock>);
5534   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5535   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5536   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5537   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5538       ReductionList.getPointer(), CGF.VoidPtrTy);
5539   llvm::Value *Args[] = {
5540       IdentTLoc,                             // ident_t *<loc>
5541       ThreadId,                              // i32 <gtid>
5542       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5543       ReductionArrayTySize,                  // size_type sizeof(RedList)
5544       RL,                                    // void *RedList
5545       ReductionFn, // void (*) (void *, void *) <reduce_func>
5546       Lock         // kmp_critical_name *&<lock>
5547   };
5548   llvm::Value *Res = CGF.EmitRuntimeCall(
5549       OMPBuilder.getOrCreateRuntimeFunction(
5550           CGM.getModule(),
5551           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5552       Args);
5553 
5554   // 5. Build switch(res)
5555   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5556   llvm::SwitchInst *SwInst =
5557       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5558 
5559   // 6. Build case 1:
5560   //  ...
5561   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5562   //  ...
5563   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5564   // break;
5565   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5566   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5567   CGF.EmitBlock(Case1BB);
5568 
5569   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570   llvm::Value *EndArgs[] = {
5571       IdentTLoc, // ident_t *<loc>
5572       ThreadId,  // i32 <gtid>
5573       Lock       // kmp_critical_name *&<lock>
5574   };
5575   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5576                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5577     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5578     auto IPriv = Privates.begin();
5579     auto ILHS = LHSExprs.begin();
5580     auto IRHS = RHSExprs.begin();
5581     for (const Expr *E : ReductionOps) {
5582       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5583                                      cast<DeclRefExpr>(*IRHS));
5584       ++IPriv;
5585       ++ILHS;
5586       ++IRHS;
5587     }
5588   };
5589   RegionCodeGenTy RCG(CodeGen);
5590   CommonActionTy Action(
5591       nullptr, llvm::None,
5592       OMPBuilder.getOrCreateRuntimeFunction(
5593           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5594                                       : OMPRTL___kmpc_end_reduce),
5595       EndArgs);
5596   RCG.setAction(Action);
5597   RCG(CGF);
5598 
5599   CGF.EmitBranch(DefaultBB);
5600 
5601   // 7. Build case 2:
5602   //  ...
5603   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5604   //  ...
5605   // break;
5606   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5607   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5608   CGF.EmitBlock(Case2BB);
5609 
5610   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5611                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5612     auto ILHS = LHSExprs.begin();
5613     auto IRHS = RHSExprs.begin();
5614     auto IPriv = Privates.begin();
5615     for (const Expr *E : ReductionOps) {
5616       const Expr *XExpr = nullptr;
5617       const Expr *EExpr = nullptr;
5618       const Expr *UpExpr = nullptr;
5619       BinaryOperatorKind BO = BO_Comma;
5620       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5621         if (BO->getOpcode() == BO_Assign) {
5622           XExpr = BO->getLHS();
5623           UpExpr = BO->getRHS();
5624         }
5625       }
5626       // Try to emit update expression as a simple atomic.
5627       const Expr *RHSExpr = UpExpr;
5628       if (RHSExpr) {
5629         // Analyze RHS part of the whole expression.
5630         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5631                 RHSExpr->IgnoreParenImpCasts())) {
5632           // If this is a conditional operator, analyze its condition for
5633           // min/max reduction operator.
5634           RHSExpr = ACO->getCond();
5635         }
5636         if (const auto *BORHS =
5637                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5638           EExpr = BORHS->getRHS();
5639           BO = BORHS->getOpcode();
5640         }
5641       }
5642       if (XExpr) {
5643         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5644         auto &&AtomicRedGen = [BO, VD,
5645                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5646                                     const Expr *EExpr, const Expr *UpExpr) {
5647           LValue X = CGF.EmitLValue(XExpr);
5648           RValue E;
5649           if (EExpr)
5650             E = CGF.EmitAnyExpr(EExpr);
5651           CGF.EmitOMPAtomicSimpleUpdateExpr(
5652               X, E, BO, /*IsXLHSInRHSPart=*/true,
5653               llvm::AtomicOrdering::Monotonic, Loc,
5654               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5655                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5656                 PrivateScope.addPrivate(
5657                     VD, [&CGF, VD, XRValue, Loc]() {
5658                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5659                       CGF.emitOMPSimpleStore(
5660                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5661                           VD->getType().getNonReferenceType(), Loc);
5662                       return LHSTemp;
5663                     });
5664                 (void)PrivateScope.Privatize();
5665                 return CGF.EmitAnyExpr(UpExpr);
5666               });
5667         };
5668         if ((*IPriv)->getType()->isArrayType()) {
5669           // Emit atomic reduction for array section.
5670           const auto *RHSVar =
5671               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5672           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5673                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5674         } else {
5675           // Emit atomic reduction for array subscript or single variable.
5676           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5677         }
5678       } else {
5679         // Emit as a critical region.
5680         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5681                                            const Expr *, const Expr *) {
5682           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5683           std::string Name = RT.getName({"atomic_reduction"});
5684           RT.emitCriticalRegion(
5685               CGF, Name,
5686               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5687                 Action.Enter(CGF);
5688                 emitReductionCombiner(CGF, E);
5689               },
5690               Loc);
5691         };
5692         if ((*IPriv)->getType()->isArrayType()) {
5693           const auto *LHSVar =
5694               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5695           const auto *RHSVar =
5696               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5697           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5698                                     CritRedGen);
5699         } else {
5700           CritRedGen(CGF, nullptr, nullptr, nullptr);
5701         }
5702       }
5703       ++ILHS;
5704       ++IRHS;
5705       ++IPriv;
5706     }
5707   };
5708   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5709   if (!WithNowait) {
5710     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5711     llvm::Value *EndArgs[] = {
5712         IdentTLoc, // ident_t *<loc>
5713         ThreadId,  // i32 <gtid>
5714         Lock       // kmp_critical_name *&<lock>
5715     };
5716     CommonActionTy Action(nullptr, llvm::None,
5717                           OMPBuilder.getOrCreateRuntimeFunction(
5718                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5719                           EndArgs);
5720     AtomicRCG.setAction(Action);
5721     AtomicRCG(CGF);
5722   } else {
5723     AtomicRCG(CGF);
5724   }
5725 
5726   CGF.EmitBranch(DefaultBB);
5727   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5728 }
5729 
5730 /// Generates unique name for artificial threadprivate variables.
5731 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5732 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5733                                       const Expr *Ref) {
5734   SmallString<256> Buffer;
5735   llvm::raw_svector_ostream Out(Buffer);
5736   const clang::DeclRefExpr *DE;
5737   const VarDecl *D = ::getBaseDecl(Ref, DE);
5738   if (!D)
5739     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5740   D = D->getCanonicalDecl();
5741   std::string Name = CGM.getOpenMPRuntime().getName(
5742       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5743   Out << Prefix << Name << "_"
5744       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5745   return std::string(Out.str());
5746 }
5747 
5748 /// Emits reduction initializer function:
5749 /// \code
5750 /// void @.red_init(void* %arg, void* %orig) {
5751 /// %0 = bitcast void* %arg to <type>*
5752 /// store <type> <init>, <type>* %0
5753 /// ret void
5754 /// }
5755 /// \endcode
5756 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5757                                            SourceLocation Loc,
5758                                            ReductionCodeGen &RCG, unsigned N) {
5759   ASTContext &C = CGM.getContext();
5760   QualType VoidPtrTy = C.VoidPtrTy;
5761   VoidPtrTy.addRestrict();
5762   FunctionArgList Args;
5763   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5764                           ImplicitParamDecl::Other);
5765   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5766                               ImplicitParamDecl::Other);
5767   Args.emplace_back(&Param);
5768   Args.emplace_back(&ParamOrig);
5769   const auto &FnInfo =
5770       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5771   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5772   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5773   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5774                                     Name, &CGM.getModule());
5775   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5776   Fn->setDoesNotRecurse();
5777   CodeGenFunction CGF(CGM);
5778   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5779   Address PrivateAddr = CGF.EmitLoadOfPointer(
5780       CGF.GetAddrOfLocalVar(&Param),
5781       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5782   llvm::Value *Size = nullptr;
5783   // If the size of the reduction item is non-constant, load it from global
5784   // threadprivate variable.
5785   if (RCG.getSizes(N).second) {
5786     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5787         CGF, CGM.getContext().getSizeType(),
5788         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5789     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5790                                 CGM.getContext().getSizeType(), Loc);
5791   }
5792   RCG.emitAggregateType(CGF, N, Size);
5793   LValue OrigLVal;
5794   // If initializer uses initializer from declare reduction construct, emit a
5795   // pointer to the address of the original reduction item (reuired by reduction
5796   // initializer)
5797   if (RCG.usesReductionInitializer(N)) {
5798     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5799     SharedAddr = CGF.EmitLoadOfPointer(
5800         SharedAddr,
5801         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5802     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5803   } else {
5804     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5805         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5806         CGM.getContext().VoidPtrTy);
5807   }
5808   // Emit the initializer:
5809   // %0 = bitcast void* %arg to <type>*
5810   // store <type> <init>, <type>* %0
5811   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5812                          [](CodeGenFunction &) { return false; });
5813   CGF.FinishFunction();
5814   return Fn;
5815 }
5816 
5817 /// Emits reduction combiner function:
5818 /// \code
5819 /// void @.red_comb(void* %arg0, void* %arg1) {
5820 /// %lhs = bitcast void* %arg0 to <type>*
5821 /// %rhs = bitcast void* %arg1 to <type>*
5822 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5823 /// store <type> %2, <type>* %lhs
5824 /// ret void
5825 /// }
5826 /// \endcode
5827 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5828                                            SourceLocation Loc,
5829                                            ReductionCodeGen &RCG, unsigned N,
5830                                            const Expr *ReductionOp,
5831                                            const Expr *LHS, const Expr *RHS,
5832                                            const Expr *PrivateRef) {
5833   ASTContext &C = CGM.getContext();
5834   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5835   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5836   FunctionArgList Args;
5837   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5838                                C.VoidPtrTy, ImplicitParamDecl::Other);
5839   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5840                             ImplicitParamDecl::Other);
5841   Args.emplace_back(&ParamInOut);
5842   Args.emplace_back(&ParamIn);
5843   const auto &FnInfo =
5844       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5845   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5846   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5847   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5848                                     Name, &CGM.getModule());
5849   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5850   Fn->setDoesNotRecurse();
5851   CodeGenFunction CGF(CGM);
5852   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5853   llvm::Value *Size = nullptr;
5854   // If the size of the reduction item is non-constant, load it from global
5855   // threadprivate variable.
5856   if (RCG.getSizes(N).second) {
5857     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5858         CGF, CGM.getContext().getSizeType(),
5859         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5860     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5861                                 CGM.getContext().getSizeType(), Loc);
5862   }
5863   RCG.emitAggregateType(CGF, N, Size);
5864   // Remap lhs and rhs variables to the addresses of the function arguments.
5865   // %lhs = bitcast void* %arg0 to <type>*
5866   // %rhs = bitcast void* %arg1 to <type>*
5867   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5868   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5869     // Pull out the pointer to the variable.
5870     Address PtrAddr = CGF.EmitLoadOfPointer(
5871         CGF.GetAddrOfLocalVar(&ParamInOut),
5872         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5873     return CGF.Builder.CreateElementBitCast(
5874         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5875   });
5876   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5877     // Pull out the pointer to the variable.
5878     Address PtrAddr = CGF.EmitLoadOfPointer(
5879         CGF.GetAddrOfLocalVar(&ParamIn),
5880         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5881     return CGF.Builder.CreateElementBitCast(
5882         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5883   });
5884   PrivateScope.Privatize();
5885   // Emit the combiner body:
5886   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5887   // store <type> %2, <type>* %lhs
5888   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5889       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5890       cast<DeclRefExpr>(RHS));
5891   CGF.FinishFunction();
5892   return Fn;
5893 }
5894 
5895 /// Emits reduction finalizer function:
5896 /// \code
5897 /// void @.red_fini(void* %arg) {
5898 /// %0 = bitcast void* %arg to <type>*
5899 /// <destroy>(<type>* %0)
5900 /// ret void
5901 /// }
5902 /// \endcode
5903 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5904                                            SourceLocation Loc,
5905                                            ReductionCodeGen &RCG, unsigned N) {
5906   if (!RCG.needCleanups(N))
5907     return nullptr;
5908   ASTContext &C = CGM.getContext();
5909   FunctionArgList Args;
5910   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5911                           ImplicitParamDecl::Other);
5912   Args.emplace_back(&Param);
5913   const auto &FnInfo =
5914       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5915   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5916   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5917   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5918                                     Name, &CGM.getModule());
5919   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5920   Fn->setDoesNotRecurse();
5921   CodeGenFunction CGF(CGM);
5922   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5923   Address PrivateAddr = CGF.EmitLoadOfPointer(
5924       CGF.GetAddrOfLocalVar(&Param),
5925       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5926   llvm::Value *Size = nullptr;
5927   // If the size of the reduction item is non-constant, load it from global
5928   // threadprivate variable.
5929   if (RCG.getSizes(N).second) {
5930     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5931         CGF, CGM.getContext().getSizeType(),
5932         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5933     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5934                                 CGM.getContext().getSizeType(), Loc);
5935   }
5936   RCG.emitAggregateType(CGF, N, Size);
5937   // Emit the finalizer body:
5938   // <destroy>(<type>* %0)
5939   RCG.emitCleanups(CGF, N, PrivateAddr);
5940   CGF.FinishFunction(Loc);
5941   return Fn;
5942 }
5943 
5944 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5945     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5946     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5947   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5948     return nullptr;
5949 
5950   // Build typedef struct:
5951   // kmp_taskred_input {
5952   //   void *reduce_shar; // shared reduction item
5953   //   void *reduce_orig; // original reduction item used for initialization
5954   //   size_t reduce_size; // size of data item
5955   //   void *reduce_init; // data initialization routine
5956   //   void *reduce_fini; // data finalization routine
5957   //   void *reduce_comb; // data combiner routine
5958   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5959   // } kmp_taskred_input_t;
5960   ASTContext &C = CGM.getContext();
5961   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5962   RD->startDefinition();
5963   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5964   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5965   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5966   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5967   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5968   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5969   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5970       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5971   RD->completeDefinition();
5972   QualType RDType = C.getRecordType(RD);
5973   unsigned Size = Data.ReductionVars.size();
5974   llvm::APInt ArraySize(/*numBits=*/64, Size);
5975   QualType ArrayRDType = C.getConstantArrayType(
5976       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5977   // kmp_task_red_input_t .rd_input.[Size];
5978   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5979   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5980                        Data.ReductionCopies, Data.ReductionOps);
5981   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5982     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5983     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5984                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5985     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5986         TaskRedInput.getPointer(), Idxs,
5987         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5988         ".rd_input.gep.");
5989     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5990     // ElemLVal.reduce_shar = &Shareds[Cnt];
5991     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5992     RCG.emitSharedOrigLValue(CGF, Cnt);
5993     llvm::Value *CastedShared =
5994         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5995     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5996     // ElemLVal.reduce_orig = &Origs[Cnt];
5997     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5998     llvm::Value *CastedOrig =
5999         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6000     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6001     RCG.emitAggregateType(CGF, Cnt);
6002     llvm::Value *SizeValInChars;
6003     llvm::Value *SizeVal;
6004     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6005     // We use delayed creation/initialization for VLAs and array sections. It is
6006     // required because runtime does not provide the way to pass the sizes of
6007     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6008     // threadprivate global variables are used to store these values and use
6009     // them in the functions.
6010     bool DelayedCreation = !!SizeVal;
6011     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6012                                                /*isSigned=*/false);
6013     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6014     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6015     // ElemLVal.reduce_init = init;
6016     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6017     llvm::Value *InitAddr =
6018         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6019     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6020     // ElemLVal.reduce_fini = fini;
6021     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6022     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6023     llvm::Value *FiniAddr = Fini
6024                                 ? CGF.EmitCastToVoidPtr(Fini)
6025                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6026     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6027     // ElemLVal.reduce_comb = comb;
6028     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6029     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6030         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6031         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6032     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6033     // ElemLVal.flags = 0;
6034     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6035     if (DelayedCreation) {
6036       CGF.EmitStoreOfScalar(
6037           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6038           FlagsLVal);
6039     } else
6040       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6041                                  FlagsLVal.getType());
6042   }
6043   if (Data.IsReductionWithTaskMod) {
6044     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6045     // is_ws, int num, void *data);
6046     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6047     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6048                                                   CGM.IntTy, /*isSigned=*/true);
6049     llvm::Value *Args[] = {
6050         IdentTLoc, GTid,
6051         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6052                                /*isSigned=*/true),
6053         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6054         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6055             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6056     return CGF.EmitRuntimeCall(
6057         OMPBuilder.getOrCreateRuntimeFunction(
6058             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6059         Args);
6060   }
6061   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6062   llvm::Value *Args[] = {
6063       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6064                                 /*isSigned=*/true),
6065       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6066       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6067                                                       CGM.VoidPtrTy)};
6068   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6069                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6070                              Args);
6071 }
6072 
6073 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6074                                             SourceLocation Loc,
6075                                             bool IsWorksharingReduction) {
6076   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6077   // is_ws, int num, void *data);
6078   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6079   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6080                                                 CGM.IntTy, /*isSigned=*/true);
6081   llvm::Value *Args[] = {IdentTLoc, GTid,
6082                          llvm::ConstantInt::get(CGM.IntTy,
6083                                                 IsWorksharingReduction ? 1 : 0,
6084                                                 /*isSigned=*/true)};
6085   (void)CGF.EmitRuntimeCall(
6086       OMPBuilder.getOrCreateRuntimeFunction(
6087           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6088       Args);
6089 }
6090 
6091 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6092                                               SourceLocation Loc,
6093                                               ReductionCodeGen &RCG,
6094                                               unsigned N) {
6095   auto Sizes = RCG.getSizes(N);
6096   // Emit threadprivate global variable if the type is non-constant
6097   // (Sizes.second = nullptr).
6098   if (Sizes.second) {
6099     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6100                                                      /*isSigned=*/false);
6101     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6102         CGF, CGM.getContext().getSizeType(),
6103         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6104     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6105   }
6106 }
6107 
6108 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6109                                               SourceLocation Loc,
6110                                               llvm::Value *ReductionsPtr,
6111                                               LValue SharedLVal) {
6112   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6113   // *d);
6114   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6115                                                    CGM.IntTy,
6116                                                    /*isSigned=*/true),
6117                          ReductionsPtr,
6118                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6119                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6120   return Address(
6121       CGF.EmitRuntimeCall(
6122           OMPBuilder.getOrCreateRuntimeFunction(
6123               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6124           Args),
6125       SharedLVal.getAlignment());
6126 }
6127 
6128 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6129                                        SourceLocation Loc) {
6130   if (!CGF.HaveInsertPoint())
6131     return;
6132 
6133   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6134     OMPBuilder.CreateTaskwait(CGF.Builder);
6135   } else {
6136     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6137     // global_tid);
6138     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6139     // Ignore return result until untied tasks are supported.
6140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6141                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6142                         Args);
6143   }
6144 
6145   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6146     Region->emitUntiedSwitch(CGF);
6147 }
6148 
6149 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6150                                            OpenMPDirectiveKind InnerKind,
6151                                            const RegionCodeGenTy &CodeGen,
6152                                            bool HasCancel) {
6153   if (!CGF.HaveInsertPoint())
6154     return;
6155   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6156   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6157 }
6158 
6159 namespace {
6160 enum RTCancelKind {
6161   CancelNoreq = 0,
6162   CancelParallel = 1,
6163   CancelLoop = 2,
6164   CancelSections = 3,
6165   CancelTaskgroup = 4
6166 };
6167 } // anonymous namespace
6168 
6169 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6170   RTCancelKind CancelKind = CancelNoreq;
6171   if (CancelRegion == OMPD_parallel)
6172     CancelKind = CancelParallel;
6173   else if (CancelRegion == OMPD_for)
6174     CancelKind = CancelLoop;
6175   else if (CancelRegion == OMPD_sections)
6176     CancelKind = CancelSections;
6177   else {
6178     assert(CancelRegion == OMPD_taskgroup);
6179     CancelKind = CancelTaskgroup;
6180   }
6181   return CancelKind;
6182 }
6183 
6184 void CGOpenMPRuntime::emitCancellationPointCall(
6185     CodeGenFunction &CGF, SourceLocation Loc,
6186     OpenMPDirectiveKind CancelRegion) {
6187   if (!CGF.HaveInsertPoint())
6188     return;
6189   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6190   // global_tid, kmp_int32 cncl_kind);
6191   if (auto *OMPRegionInfo =
6192           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6193     // For 'cancellation point taskgroup', the task region info may not have a
6194     // cancel. This may instead happen in another adjacent task.
6195     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6196       llvm::Value *Args[] = {
6197           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6198           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6199       // Ignore return result until untied tasks are supported.
6200       llvm::Value *Result = CGF.EmitRuntimeCall(
6201           OMPBuilder.getOrCreateRuntimeFunction(
6202               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6203           Args);
6204       // if (__kmpc_cancellationpoint()) {
6205       //   exit from construct;
6206       // }
6207       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6208       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6209       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6210       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6211       CGF.EmitBlock(ExitBB);
6212       // exit from construct;
6213       CodeGenFunction::JumpDest CancelDest =
6214           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6215       CGF.EmitBranchThroughCleanup(CancelDest);
6216       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6217     }
6218   }
6219 }
6220 
6221 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6222                                      const Expr *IfCond,
6223                                      OpenMPDirectiveKind CancelRegion) {
6224   if (!CGF.HaveInsertPoint())
6225     return;
6226   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6227   // kmp_int32 cncl_kind);
6228   auto &M = CGM.getModule();
6229   if (auto *OMPRegionInfo =
6230           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6231     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6232                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6233       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6234       llvm::Value *Args[] = {
6235           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6236           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6237       // Ignore return result until untied tasks are supported.
6238       llvm::Value *Result = CGF.EmitRuntimeCall(
6239           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6240       // if (__kmpc_cancel()) {
6241       //   exit from construct;
6242       // }
6243       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6244       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6245       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6246       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6247       CGF.EmitBlock(ExitBB);
6248       // exit from construct;
6249       CodeGenFunction::JumpDest CancelDest =
6250           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6251       CGF.EmitBranchThroughCleanup(CancelDest);
6252       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6253     };
6254     if (IfCond) {
6255       emitIfClause(CGF, IfCond, ThenGen,
6256                    [](CodeGenFunction &, PrePostActionTy &) {});
6257     } else {
6258       RegionCodeGenTy ThenRCG(ThenGen);
6259       ThenRCG(CGF);
6260     }
6261   }
6262 }
6263 
6264 namespace {
6265 /// Cleanup action for uses_allocators support.
6266 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6267   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6268 
6269 public:
6270   OMPUsesAllocatorsActionTy(
6271       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6272       : Allocators(Allocators) {}
6273   void Enter(CodeGenFunction &CGF) override {
6274     if (!CGF.HaveInsertPoint())
6275       return;
6276     for (const auto &AllocatorData : Allocators) {
6277       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6278           CGF, AllocatorData.first, AllocatorData.second);
6279     }
6280   }
6281   void Exit(CodeGenFunction &CGF) override {
6282     if (!CGF.HaveInsertPoint())
6283       return;
6284     for (const auto &AllocatorData : Allocators) {
6285       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6286                                                         AllocatorData.first);
6287     }
6288   }
6289 };
6290 } // namespace
6291 
6292 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6293     const OMPExecutableDirective &D, StringRef ParentName,
6294     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6295     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6296   assert(!ParentName.empty() && "Invalid target region parent name!");
6297   HasEmittedTargetRegion = true;
6298   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6299   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6300     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6301       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6302       if (!D.AllocatorTraits)
6303         continue;
6304       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6305     }
6306   }
6307   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6308   CodeGen.setAction(UsesAllocatorAction);
6309   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6310                                    IsOffloadEntry, CodeGen);
6311 }
6312 
6313 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6314                                              const Expr *Allocator,
6315                                              const Expr *AllocatorTraits) {
6316   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6317   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6318   // Use default memspace handle.
6319   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6320   llvm::Value *NumTraits = llvm::ConstantInt::get(
6321       CGF.IntTy, cast<ConstantArrayType>(
6322                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6323                      ->getSize()
6324                      .getLimitedValue());
6325   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6326   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6327       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6328   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6329                                            AllocatorTraitsLVal.getBaseInfo(),
6330                                            AllocatorTraitsLVal.getTBAAInfo());
6331   llvm::Value *Traits =
6332       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6333 
6334   llvm::Value *AllocatorVal =
6335       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6336                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6337                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6338   // Store to allocator.
6339   CGF.EmitVarDecl(*cast<VarDecl>(
6340       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6341   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6342   AllocatorVal =
6343       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6344                                Allocator->getType(), Allocator->getExprLoc());
6345   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6346 }
6347 
6348 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6349                                              const Expr *Allocator) {
6350   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6351   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6352   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6353   llvm::Value *AllocatorVal =
6354       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6355   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6356                                           CGF.getContext().VoidPtrTy,
6357                                           Allocator->getExprLoc());
6358   (void)CGF.EmitRuntimeCall(
6359       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6360                                             OMPRTL___kmpc_destroy_allocator),
6361       {ThreadId, AllocatorVal});
6362 }
6363 
6364 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6365     const OMPExecutableDirective &D, StringRef ParentName,
6366     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6367     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6368   // Create a unique name for the entry function using the source location
6369   // information of the current target region. The name will be something like:
6370   //
6371   // __omp_offloading_DD_FFFF_PP_lBB
6372   //
6373   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6374   // mangled name of the function that encloses the target region and BB is the
6375   // line number of the target region.
6376 
6377   unsigned DeviceID;
6378   unsigned FileID;
6379   unsigned Line;
6380   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6381                            Line);
6382   SmallString<64> EntryFnName;
6383   {
6384     llvm::raw_svector_ostream OS(EntryFnName);
6385     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6386        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6387   }
6388 
6389   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6390 
6391   CodeGenFunction CGF(CGM, true);
6392   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6393   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6394 
6395   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6396 
6397   // If this target outline function is not an offload entry, we don't need to
6398   // register it.
6399   if (!IsOffloadEntry)
6400     return;
6401 
6402   // The target region ID is used by the runtime library to identify the current
6403   // target region, so it only has to be unique and not necessarily point to
6404   // anything. It could be the pointer to the outlined function that implements
6405   // the target region, but we aren't using that so that the compiler doesn't
6406   // need to keep that, and could therefore inline the host function if proven
6407   // worthwhile during optimization. In the other hand, if emitting code for the
6408   // device, the ID has to be the function address so that it can retrieved from
6409   // the offloading entry and launched by the runtime library. We also mark the
6410   // outlined function to have external linkage in case we are emitting code for
6411   // the device, because these functions will be entry points to the device.
6412 
6413   if (CGM.getLangOpts().OpenMPIsDevice) {
6414     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6415     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6416     OutlinedFn->setDSOLocal(false);
6417   } else {
6418     std::string Name = getName({EntryFnName, "region_id"});
6419     OutlinedFnID = new llvm::GlobalVariable(
6420         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6421         llvm::GlobalValue::WeakAnyLinkage,
6422         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6423   }
6424 
6425   // Register the information for the entry associated with this target region.
6426   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6427       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6428       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6429 }
6430 
6431 /// Checks if the expression is constant or does not have non-trivial function
6432 /// calls.
6433 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6434   // We can skip constant expressions.
6435   // We can skip expressions with trivial calls or simple expressions.
6436   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6437           !E->hasNonTrivialCall(Ctx)) &&
6438          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6439 }
6440 
6441 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6442                                                     const Stmt *Body) {
6443   const Stmt *Child = Body->IgnoreContainers();
6444   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6445     Child = nullptr;
6446     for (const Stmt *S : C->body()) {
6447       if (const auto *E = dyn_cast<Expr>(S)) {
6448         if (isTrivial(Ctx, E))
6449           continue;
6450       }
6451       // Some of the statements can be ignored.
6452       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6453           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6454         continue;
6455       // Analyze declarations.
6456       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6457         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6458               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6459                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6460                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6461                   isa<UsingDirectiveDecl>(D) ||
6462                   isa<OMPDeclareReductionDecl>(D) ||
6463                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6464                 return true;
6465               const auto *VD = dyn_cast<VarDecl>(D);
6466               if (!VD)
6467                 return false;
6468               return VD->isConstexpr() ||
6469                      ((VD->getType().isTrivialType(Ctx) ||
6470                        VD->getType()->isReferenceType()) &&
6471                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6472             }))
6473           continue;
6474       }
6475       // Found multiple children - cannot get the one child only.
6476       if (Child)
6477         return nullptr;
6478       Child = S;
6479     }
6480     if (Child)
6481       Child = Child->IgnoreContainers();
6482   }
6483   return Child;
6484 }
6485 
6486 /// Emit the number of teams for a target directive.  Inspect the num_teams
6487 /// clause associated with a teams construct combined or closely nested
6488 /// with the target directive.
6489 ///
6490 /// Emit a team of size one for directives such as 'target parallel' that
6491 /// have no associated teams construct.
6492 ///
6493 /// Otherwise, return nullptr.
6494 static llvm::Value *
6495 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6496                                const OMPExecutableDirective &D) {
6497   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6498          "Clauses associated with the teams directive expected to be emitted "
6499          "only for the host!");
6500   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6501   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6502          "Expected target-based executable directive.");
6503   CGBuilderTy &Bld = CGF.Builder;
6504   switch (DirectiveKind) {
6505   case OMPD_target: {
6506     const auto *CS = D.getInnermostCapturedStmt();
6507     const auto *Body =
6508         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6509     const Stmt *ChildStmt =
6510         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6511     if (const auto *NestedDir =
6512             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6513       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6514         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6515           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6516           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6517           const Expr *NumTeams =
6518               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6519           llvm::Value *NumTeamsVal =
6520               CGF.EmitScalarExpr(NumTeams,
6521                                  /*IgnoreResultAssign*/ true);
6522           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6523                                    /*isSigned=*/true);
6524         }
6525         return Bld.getInt32(0);
6526       }
6527       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6528           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6529         return Bld.getInt32(1);
6530       return Bld.getInt32(0);
6531     }
6532     return nullptr;
6533   }
6534   case OMPD_target_teams:
6535   case OMPD_target_teams_distribute:
6536   case OMPD_target_teams_distribute_simd:
6537   case OMPD_target_teams_distribute_parallel_for:
6538   case OMPD_target_teams_distribute_parallel_for_simd: {
6539     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6540       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6541       const Expr *NumTeams =
6542           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6543       llvm::Value *NumTeamsVal =
6544           CGF.EmitScalarExpr(NumTeams,
6545                              /*IgnoreResultAssign*/ true);
6546       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6547                                /*isSigned=*/true);
6548     }
6549     return Bld.getInt32(0);
6550   }
6551   case OMPD_target_parallel:
6552   case OMPD_target_parallel_for:
6553   case OMPD_target_parallel_for_simd:
6554   case OMPD_target_simd:
6555     return Bld.getInt32(1);
6556   case OMPD_parallel:
6557   case OMPD_for:
6558   case OMPD_parallel_for:
6559   case OMPD_parallel_master:
6560   case OMPD_parallel_sections:
6561   case OMPD_for_simd:
6562   case OMPD_parallel_for_simd:
6563   case OMPD_cancel:
6564   case OMPD_cancellation_point:
6565   case OMPD_ordered:
6566   case OMPD_threadprivate:
6567   case OMPD_allocate:
6568   case OMPD_task:
6569   case OMPD_simd:
6570   case OMPD_sections:
6571   case OMPD_section:
6572   case OMPD_single:
6573   case OMPD_master:
6574   case OMPD_critical:
6575   case OMPD_taskyield:
6576   case OMPD_barrier:
6577   case OMPD_taskwait:
6578   case OMPD_taskgroup:
6579   case OMPD_atomic:
6580   case OMPD_flush:
6581   case OMPD_depobj:
6582   case OMPD_scan:
6583   case OMPD_teams:
6584   case OMPD_target_data:
6585   case OMPD_target_exit_data:
6586   case OMPD_target_enter_data:
6587   case OMPD_distribute:
6588   case OMPD_distribute_simd:
6589   case OMPD_distribute_parallel_for:
6590   case OMPD_distribute_parallel_for_simd:
6591   case OMPD_teams_distribute:
6592   case OMPD_teams_distribute_simd:
6593   case OMPD_teams_distribute_parallel_for:
6594   case OMPD_teams_distribute_parallel_for_simd:
6595   case OMPD_target_update:
6596   case OMPD_declare_simd:
6597   case OMPD_declare_variant:
6598   case OMPD_begin_declare_variant:
6599   case OMPD_end_declare_variant:
6600   case OMPD_declare_target:
6601   case OMPD_end_declare_target:
6602   case OMPD_declare_reduction:
6603   case OMPD_declare_mapper:
6604   case OMPD_taskloop:
6605   case OMPD_taskloop_simd:
6606   case OMPD_master_taskloop:
6607   case OMPD_master_taskloop_simd:
6608   case OMPD_parallel_master_taskloop:
6609   case OMPD_parallel_master_taskloop_simd:
6610   case OMPD_requires:
6611   case OMPD_unknown:
6612     break;
6613   default:
6614     break;
6615   }
6616   llvm_unreachable("Unexpected directive kind.");
6617 }
6618 
6619 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6620                                   llvm::Value *DefaultThreadLimitVal) {
6621   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6622       CGF.getContext(), CS->getCapturedStmt());
6623   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6624     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6625       llvm::Value *NumThreads = nullptr;
6626       llvm::Value *CondVal = nullptr;
6627       // Handle if clause. If if clause present, the number of threads is
6628       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6629       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6630         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6631         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6632         const OMPIfClause *IfClause = nullptr;
6633         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6634           if (C->getNameModifier() == OMPD_unknown ||
6635               C->getNameModifier() == OMPD_parallel) {
6636             IfClause = C;
6637             break;
6638           }
6639         }
6640         if (IfClause) {
6641           const Expr *Cond = IfClause->getCondition();
6642           bool Result;
6643           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6644             if (!Result)
6645               return CGF.Builder.getInt32(1);
6646           } else {
6647             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6648             if (const auto *PreInit =
6649                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6650               for (const auto *I : PreInit->decls()) {
6651                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6652                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6653                 } else {
6654                   CodeGenFunction::AutoVarEmission Emission =
6655                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6656                   CGF.EmitAutoVarCleanups(Emission);
6657                 }
6658               }
6659             }
6660             CondVal = CGF.EvaluateExprAsBool(Cond);
6661           }
6662         }
6663       }
6664       // Check the value of num_threads clause iff if clause was not specified
6665       // or is not evaluated to false.
6666       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6667         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6668         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6669         const auto *NumThreadsClause =
6670             Dir->getSingleClause<OMPNumThreadsClause>();
6671         CodeGenFunction::LexicalScope Scope(
6672             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6673         if (const auto *PreInit =
6674                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6675           for (const auto *I : PreInit->decls()) {
6676             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6677               CGF.EmitVarDecl(cast<VarDecl>(*I));
6678             } else {
6679               CodeGenFunction::AutoVarEmission Emission =
6680                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6681               CGF.EmitAutoVarCleanups(Emission);
6682             }
6683           }
6684         }
6685         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6686         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6687                                                /*isSigned=*/false);
6688         if (DefaultThreadLimitVal)
6689           NumThreads = CGF.Builder.CreateSelect(
6690               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6691               DefaultThreadLimitVal, NumThreads);
6692       } else {
6693         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6694                                            : CGF.Builder.getInt32(0);
6695       }
6696       // Process condition of the if clause.
6697       if (CondVal) {
6698         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6699                                               CGF.Builder.getInt32(1));
6700       }
6701       return NumThreads;
6702     }
6703     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6704       return CGF.Builder.getInt32(1);
6705     return DefaultThreadLimitVal;
6706   }
6707   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6708                                : CGF.Builder.getInt32(0);
6709 }
6710 
6711 /// Emit the number of threads for a target directive.  Inspect the
6712 /// thread_limit clause associated with a teams construct combined or closely
6713 /// nested with the target directive.
6714 ///
6715 /// Emit the num_threads clause for directives such as 'target parallel' that
6716 /// have no associated teams construct.
6717 ///
6718 /// Otherwise, return nullptr.
6719 static llvm::Value *
6720 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6721                                  const OMPExecutableDirective &D) {
6722   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6723          "Clauses associated with the teams directive expected to be emitted "
6724          "only for the host!");
6725   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6726   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6727          "Expected target-based executable directive.");
6728   CGBuilderTy &Bld = CGF.Builder;
6729   llvm::Value *ThreadLimitVal = nullptr;
6730   llvm::Value *NumThreadsVal = nullptr;
6731   switch (DirectiveKind) {
6732   case OMPD_target: {
6733     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6734     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6735       return NumThreads;
6736     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6737         CGF.getContext(), CS->getCapturedStmt());
6738     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6739       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6740         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6741         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6742         const auto *ThreadLimitClause =
6743             Dir->getSingleClause<OMPThreadLimitClause>();
6744         CodeGenFunction::LexicalScope Scope(
6745             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6746         if (const auto *PreInit =
6747                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6748           for (const auto *I : PreInit->decls()) {
6749             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6750               CGF.EmitVarDecl(cast<VarDecl>(*I));
6751             } else {
6752               CodeGenFunction::AutoVarEmission Emission =
6753                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6754               CGF.EmitAutoVarCleanups(Emission);
6755             }
6756           }
6757         }
6758         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6759             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6760         ThreadLimitVal =
6761             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6762       }
6763       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6764           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6765         CS = Dir->getInnermostCapturedStmt();
6766         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6767             CGF.getContext(), CS->getCapturedStmt());
6768         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6769       }
6770       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6771           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6772         CS = Dir->getInnermostCapturedStmt();
6773         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6774           return NumThreads;
6775       }
6776       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6777         return Bld.getInt32(1);
6778     }
6779     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6780   }
6781   case OMPD_target_teams: {
6782     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6783       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6784       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6785       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6786           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6787       ThreadLimitVal =
6788           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6789     }
6790     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6791     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6792       return NumThreads;
6793     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6794         CGF.getContext(), CS->getCapturedStmt());
6795     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6796       if (Dir->getDirectiveKind() == OMPD_distribute) {
6797         CS = Dir->getInnermostCapturedStmt();
6798         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6799           return NumThreads;
6800       }
6801     }
6802     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6803   }
6804   case OMPD_target_teams_distribute:
6805     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6806       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6807       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6808       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6809           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6810       ThreadLimitVal =
6811           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6812     }
6813     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6814   case OMPD_target_parallel:
6815   case OMPD_target_parallel_for:
6816   case OMPD_target_parallel_for_simd:
6817   case OMPD_target_teams_distribute_parallel_for:
6818   case OMPD_target_teams_distribute_parallel_for_simd: {
6819     llvm::Value *CondVal = nullptr;
6820     // Handle if clause. If if clause present, the number of threads is
6821     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6822     if (D.hasClausesOfKind<OMPIfClause>()) {
6823       const OMPIfClause *IfClause = nullptr;
6824       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6825         if (C->getNameModifier() == OMPD_unknown ||
6826             C->getNameModifier() == OMPD_parallel) {
6827           IfClause = C;
6828           break;
6829         }
6830       }
6831       if (IfClause) {
6832         const Expr *Cond = IfClause->getCondition();
6833         bool Result;
6834         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6835           if (!Result)
6836             return Bld.getInt32(1);
6837         } else {
6838           CodeGenFunction::RunCleanupsScope Scope(CGF);
6839           CondVal = CGF.EvaluateExprAsBool(Cond);
6840         }
6841       }
6842     }
6843     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6844       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6845       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6846       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6847           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6848       ThreadLimitVal =
6849           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6850     }
6851     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6852       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6853       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6854       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6855           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6856       NumThreadsVal =
6857           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6858       ThreadLimitVal = ThreadLimitVal
6859                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6860                                                                 ThreadLimitVal),
6861                                               NumThreadsVal, ThreadLimitVal)
6862                            : NumThreadsVal;
6863     }
6864     if (!ThreadLimitVal)
6865       ThreadLimitVal = Bld.getInt32(0);
6866     if (CondVal)
6867       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6868     return ThreadLimitVal;
6869   }
6870   case OMPD_target_teams_distribute_simd:
6871   case OMPD_target_simd:
6872     return Bld.getInt32(1);
6873   case OMPD_parallel:
6874   case OMPD_for:
6875   case OMPD_parallel_for:
6876   case OMPD_parallel_master:
6877   case OMPD_parallel_sections:
6878   case OMPD_for_simd:
6879   case OMPD_parallel_for_simd:
6880   case OMPD_cancel:
6881   case OMPD_cancellation_point:
6882   case OMPD_ordered:
6883   case OMPD_threadprivate:
6884   case OMPD_allocate:
6885   case OMPD_task:
6886   case OMPD_simd:
6887   case OMPD_sections:
6888   case OMPD_section:
6889   case OMPD_single:
6890   case OMPD_master:
6891   case OMPD_critical:
6892   case OMPD_taskyield:
6893   case OMPD_barrier:
6894   case OMPD_taskwait:
6895   case OMPD_taskgroup:
6896   case OMPD_atomic:
6897   case OMPD_flush:
6898   case OMPD_depobj:
6899   case OMPD_scan:
6900   case OMPD_teams:
6901   case OMPD_target_data:
6902   case OMPD_target_exit_data:
6903   case OMPD_target_enter_data:
6904   case OMPD_distribute:
6905   case OMPD_distribute_simd:
6906   case OMPD_distribute_parallel_for:
6907   case OMPD_distribute_parallel_for_simd:
6908   case OMPD_teams_distribute:
6909   case OMPD_teams_distribute_simd:
6910   case OMPD_teams_distribute_parallel_for:
6911   case OMPD_teams_distribute_parallel_for_simd:
6912   case OMPD_target_update:
6913   case OMPD_declare_simd:
6914   case OMPD_declare_variant:
6915   case OMPD_begin_declare_variant:
6916   case OMPD_end_declare_variant:
6917   case OMPD_declare_target:
6918   case OMPD_end_declare_target:
6919   case OMPD_declare_reduction:
6920   case OMPD_declare_mapper:
6921   case OMPD_taskloop:
6922   case OMPD_taskloop_simd:
6923   case OMPD_master_taskloop:
6924   case OMPD_master_taskloop_simd:
6925   case OMPD_parallel_master_taskloop:
6926   case OMPD_parallel_master_taskloop_simd:
6927   case OMPD_requires:
6928   case OMPD_unknown:
6929     break;
6930   default:
6931     break;
6932   }
6933   llvm_unreachable("Unsupported directive kind.");
6934 }
6935 
6936 namespace {
6937 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6938 
6939 // Utility to handle information from clauses associated with a given
6940 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6941 // It provides a convenient interface to obtain the information and generate
6942 // code for that information.
6943 class MappableExprsHandler {
6944 public:
6945   /// Values for bit flags used to specify the mapping type for
6946   /// offloading.
6947   enum OpenMPOffloadMappingFlags : uint64_t {
6948     /// No flags
6949     OMP_MAP_NONE = 0x0,
6950     /// Allocate memory on the device and move data from host to device.
6951     OMP_MAP_TO = 0x01,
6952     /// Allocate memory on the device and move data from device to host.
6953     OMP_MAP_FROM = 0x02,
6954     /// Always perform the requested mapping action on the element, even
6955     /// if it was already mapped before.
6956     OMP_MAP_ALWAYS = 0x04,
6957     /// Delete the element from the device environment, ignoring the
6958     /// current reference count associated with the element.
6959     OMP_MAP_DELETE = 0x08,
6960     /// The element being mapped is a pointer-pointee pair; both the
6961     /// pointer and the pointee should be mapped.
6962     OMP_MAP_PTR_AND_OBJ = 0x10,
6963     /// This flags signals that the base address of an entry should be
6964     /// passed to the target kernel as an argument.
6965     OMP_MAP_TARGET_PARAM = 0x20,
6966     /// Signal that the runtime library has to return the device pointer
6967     /// in the current position for the data being mapped. Used when we have the
6968     /// use_device_ptr or use_device_addr clause.
6969     OMP_MAP_RETURN_PARAM = 0x40,
6970     /// This flag signals that the reference being passed is a pointer to
6971     /// private data.
6972     OMP_MAP_PRIVATE = 0x80,
6973     /// Pass the element to the device by value.
6974     OMP_MAP_LITERAL = 0x100,
6975     /// Implicit map
6976     OMP_MAP_IMPLICIT = 0x200,
6977     /// Close is a hint to the runtime to allocate memory close to
6978     /// the target device.
6979     OMP_MAP_CLOSE = 0x400,
6980     /// 0x800 is reserved for compatibility with XLC.
6981     /// Produce a runtime error if the data is not already allocated.
6982     OMP_MAP_PRESENT = 0x1000,
6983     /// The 16 MSBs of the flags indicate whether the entry is member of some
6984     /// struct/class.
6985     OMP_MAP_MEMBER_OF = 0xffff000000000000,
6986     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6987   };
6988 
6989   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6990   static unsigned getFlagMemberOffset() {
6991     unsigned Offset = 0;
6992     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
6993          Remain = Remain >> 1)
6994       Offset++;
6995     return Offset;
6996   }
6997 
6998   /// Class that associates information with a base pointer to be passed to the
6999   /// runtime library.
7000   class BasePointerInfo {
7001     /// The base pointer.
7002     llvm::Value *Ptr = nullptr;
7003     /// The base declaration that refers to this device pointer, or null if
7004     /// there is none.
7005     const ValueDecl *DevPtrDecl = nullptr;
7006 
7007   public:
7008     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7009         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7010     llvm::Value *operator*() const { return Ptr; }
7011     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7012     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7013   };
7014 
7015   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7016   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7017   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7018   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7019 
7020   /// This structure contains combined information generated for mappable
7021   /// clauses, including base pointers, pointers, sizes, map types, and
7022   /// user-defined mappers.
7023   struct MapCombinedInfoTy {
7024     MapBaseValuesArrayTy BasePointers;
7025     MapValuesArrayTy Pointers;
7026     MapValuesArrayTy Sizes;
7027     MapFlagsArrayTy Types;
7028     MapMappersArrayTy Mappers;
7029 
7030     /// Append arrays in \a CurInfo.
7031     void append(MapCombinedInfoTy &CurInfo) {
7032       BasePointers.append(CurInfo.BasePointers.begin(),
7033                           CurInfo.BasePointers.end());
7034       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7035       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7036       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7037       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7038     }
7039   };
7040 
7041   /// Map between a struct and the its lowest & highest elements which have been
7042   /// mapped.
7043   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7044   ///                    HE(FieldIndex, Pointer)}
7045   struct StructRangeInfoTy {
7046     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7047         0, Address::invalid()};
7048     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7049         0, Address::invalid()};
7050     Address Base = Address::invalid();
7051   };
7052 
7053 private:
7054   /// Kind that defines how a device pointer has to be returned.
7055   struct MapInfo {
7056     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7057     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7058     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7059     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7060     bool ReturnDevicePointer = false;
7061     bool IsImplicit = false;
7062     const ValueDecl *Mapper = nullptr;
7063     bool ForDeviceAddr = false;
7064 
7065     MapInfo() = default;
7066     MapInfo(
7067         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7068         OpenMPMapClauseKind MapType,
7069         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7070         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7071         bool ReturnDevicePointer, bool IsImplicit,
7072         const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false)
7073         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7074           MotionModifiers(MotionModifiers),
7075           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7076           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7077   };
7078 
7079   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7080   /// member and there is no map information about it, then emission of that
7081   /// entry is deferred until the whole struct has been processed.
7082   struct DeferredDevicePtrEntryTy {
7083     const Expr *IE = nullptr;
7084     const ValueDecl *VD = nullptr;
7085     bool ForDeviceAddr = false;
7086 
7087     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7088                              bool ForDeviceAddr)
7089         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7090   };
7091 
7092   /// The target directive from where the mappable clauses were extracted. It
7093   /// is either a executable directive or a user-defined mapper directive.
7094   llvm::PointerUnion<const OMPExecutableDirective *,
7095                      const OMPDeclareMapperDecl *>
7096       CurDir;
7097 
7098   /// Function the directive is being generated for.
7099   CodeGenFunction &CGF;
7100 
7101   /// Set of all first private variables in the current directive.
7102   /// bool data is set to true if the variable is implicitly marked as
7103   /// firstprivate, false otherwise.
7104   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7105 
7106   /// Map between device pointer declarations and their expression components.
7107   /// The key value for declarations in 'this' is null.
7108   llvm::DenseMap<
7109       const ValueDecl *,
7110       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7111       DevPointersMap;
7112 
7113   llvm::Value *getExprTypeSize(const Expr *E) const {
7114     QualType ExprTy = E->getType().getCanonicalType();
7115 
7116     // Calculate the size for array shaping expression.
7117     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7118       llvm::Value *Size =
7119           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7120       for (const Expr *SE : OAE->getDimensions()) {
7121         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7122         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7123                                       CGF.getContext().getSizeType(),
7124                                       SE->getExprLoc());
7125         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7126       }
7127       return Size;
7128     }
7129 
7130     // Reference types are ignored for mapping purposes.
7131     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7132       ExprTy = RefTy->getPointeeType().getCanonicalType();
7133 
7134     // Given that an array section is considered a built-in type, we need to
7135     // do the calculation based on the length of the section instead of relying
7136     // on CGF.getTypeSize(E->getType()).
7137     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7138       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7139                             OAE->getBase()->IgnoreParenImpCasts())
7140                             .getCanonicalType();
7141 
7142       // If there is no length associated with the expression and lower bound is
7143       // not specified too, that means we are using the whole length of the
7144       // base.
7145       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7146           !OAE->getLowerBound())
7147         return CGF.getTypeSize(BaseTy);
7148 
7149       llvm::Value *ElemSize;
7150       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7151         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7152       } else {
7153         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7154         assert(ATy && "Expecting array type if not a pointer type.");
7155         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7156       }
7157 
7158       // If we don't have a length at this point, that is because we have an
7159       // array section with a single element.
7160       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7161         return ElemSize;
7162 
7163       if (const Expr *LenExpr = OAE->getLength()) {
7164         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7165         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7166                                              CGF.getContext().getSizeType(),
7167                                              LenExpr->getExprLoc());
7168         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7169       }
7170       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7171              OAE->getLowerBound() && "expected array_section[lb:].");
7172       // Size = sizetype - lb * elemtype;
7173       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7174       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7175       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7176                                        CGF.getContext().getSizeType(),
7177                                        OAE->getLowerBound()->getExprLoc());
7178       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7179       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7180       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7181       LengthVal = CGF.Builder.CreateSelect(
7182           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7183       return LengthVal;
7184     }
7185     return CGF.getTypeSize(ExprTy);
7186   }
7187 
7188   /// Return the corresponding bits for a given map clause modifier. Add
7189   /// a flag marking the map as a pointer if requested. Add a flag marking the
7190   /// map as the first one of a series of maps that relate to the same map
7191   /// expression.
7192   OpenMPOffloadMappingFlags getMapTypeBits(
7193       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7194       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7195       bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7196     OpenMPOffloadMappingFlags Bits =
7197         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7198     switch (MapType) {
7199     case OMPC_MAP_alloc:
7200     case OMPC_MAP_release:
7201       // alloc and release is the default behavior in the runtime library,  i.e.
7202       // if we don't pass any bits alloc/release that is what the runtime is
7203       // going to do. Therefore, we don't need to signal anything for these two
7204       // type modifiers.
7205       break;
7206     case OMPC_MAP_to:
7207       Bits |= OMP_MAP_TO;
7208       break;
7209     case OMPC_MAP_from:
7210       Bits |= OMP_MAP_FROM;
7211       break;
7212     case OMPC_MAP_tofrom:
7213       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7214       break;
7215     case OMPC_MAP_delete:
7216       Bits |= OMP_MAP_DELETE;
7217       break;
7218     case OMPC_MAP_unknown:
7219       llvm_unreachable("Unexpected map type!");
7220     }
7221     if (AddPtrFlag)
7222       Bits |= OMP_MAP_PTR_AND_OBJ;
7223     if (AddIsTargetParamFlag)
7224       Bits |= OMP_MAP_TARGET_PARAM;
7225     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7226         != MapModifiers.end())
7227       Bits |= OMP_MAP_ALWAYS;
7228     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7229         != MapModifiers.end())
7230       Bits |= OMP_MAP_CLOSE;
7231     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7232         != MapModifiers.end())
7233       Bits |= OMP_MAP_PRESENT;
7234     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7235         != MotionModifiers.end())
7236       Bits |= OMP_MAP_PRESENT;
7237     return Bits;
7238   }
7239 
7240   /// Return true if the provided expression is a final array section. A
7241   /// final array section, is one whose length can't be proved to be one.
7242   bool isFinalArraySectionExpression(const Expr *E) const {
7243     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7244 
7245     // It is not an array section and therefore not a unity-size one.
7246     if (!OASE)
7247       return false;
7248 
7249     // An array section with no colon always refer to a single element.
7250     if (OASE->getColonLocFirst().isInvalid())
7251       return false;
7252 
7253     const Expr *Length = OASE->getLength();
7254 
7255     // If we don't have a length we have to check if the array has size 1
7256     // for this dimension. Also, we should always expect a length if the
7257     // base type is pointer.
7258     if (!Length) {
7259       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7260                              OASE->getBase()->IgnoreParenImpCasts())
7261                              .getCanonicalType();
7262       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7263         return ATy->getSize().getSExtValue() != 1;
7264       // If we don't have a constant dimension length, we have to consider
7265       // the current section as having any size, so it is not necessarily
7266       // unitary. If it happen to be unity size, that's user fault.
7267       return true;
7268     }
7269 
7270     // Check if the length evaluates to 1.
7271     Expr::EvalResult Result;
7272     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7273       return true; // Can have more that size 1.
7274 
7275     llvm::APSInt ConstLength = Result.Val.getInt();
7276     return ConstLength.getSExtValue() != 1;
7277   }
7278 
7279   /// Generate the base pointers, section pointers, sizes, map type bits, and
7280   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7281   /// map type, map or motion modifiers, and expression components.
7282   /// \a IsFirstComponent should be set to true if the provided set of
7283   /// components is the first associated with a capture.
7284   void generateInfoForComponentList(
7285       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7286       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7287       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7288       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7289       bool IsFirstComponentList, bool IsImplicit,
7290       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7291       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7292           OverlappedElements = llvm::None) const {
7293     // The following summarizes what has to be generated for each map and the
7294     // types below. The generated information is expressed in this order:
7295     // base pointer, section pointer, size, flags
7296     // (to add to the ones that come from the map type and modifier).
7297     //
7298     // double d;
7299     // int i[100];
7300     // float *p;
7301     //
7302     // struct S1 {
7303     //   int i;
7304     //   float f[50];
7305     // }
7306     // struct S2 {
7307     //   int i;
7308     //   float f[50];
7309     //   S1 s;
7310     //   double *p;
7311     //   struct S2 *ps;
7312     // }
7313     // S2 s;
7314     // S2 *ps;
7315     //
7316     // map(d)
7317     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7318     //
7319     // map(i)
7320     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7321     //
7322     // map(i[1:23])
7323     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7324     //
7325     // map(p)
7326     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7327     //
7328     // map(p[1:24])
7329     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7330     // in unified shared memory mode or for local pointers
7331     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7332     //
7333     // map(s)
7334     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7335     //
7336     // map(s.i)
7337     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7338     //
7339     // map(s.s.f)
7340     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7341     //
7342     // map(s.p)
7343     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7344     //
7345     // map(to: s.p[:22])
7346     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7347     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7348     // &(s.p), &(s.p[0]), 22*sizeof(double),
7349     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7350     // (*) alloc space for struct members, only this is a target parameter
7351     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7352     //      optimizes this entry out, same in the examples below)
7353     // (***) map the pointee (map: to)
7354     //
7355     // map(s.ps)
7356     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7357     //
7358     // map(from: s.ps->s.i)
7359     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7360     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7361     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7362     //
7363     // map(to: s.ps->ps)
7364     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7365     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7366     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7367     //
7368     // map(s.ps->ps->ps)
7369     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7370     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7371     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7372     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7373     //
7374     // map(to: s.ps->ps->s.f[:22])
7375     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7376     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7377     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7378     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7379     //
7380     // map(ps)
7381     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7382     //
7383     // map(ps->i)
7384     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7385     //
7386     // map(ps->s.f)
7387     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7388     //
7389     // map(from: ps->p)
7390     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7391     //
7392     // map(to: ps->p[:22])
7393     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7394     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7395     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7396     //
7397     // map(ps->ps)
7398     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7399     //
7400     // map(from: ps->ps->s.i)
7401     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7402     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7403     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7404     //
7405     // map(from: ps->ps->ps)
7406     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7407     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7408     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7409     //
7410     // map(ps->ps->ps->ps)
7411     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7412     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7413     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7414     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7415     //
7416     // map(to: ps->ps->ps->s.f[:22])
7417     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7418     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7419     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7420     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7421     //
7422     // map(to: s.f[:22]) map(from: s.p[:33])
7423     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7424     //     sizeof(double*) (**), TARGET_PARAM
7425     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7426     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7427     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7428     // (*) allocate contiguous space needed to fit all mapped members even if
7429     //     we allocate space for members not mapped (in this example,
7430     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7431     //     them as well because they fall between &s.f[0] and &s.p)
7432     //
7433     // map(from: s.f[:22]) map(to: ps->p[:33])
7434     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7435     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7436     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7437     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7438     // (*) the struct this entry pertains to is the 2nd element in the list of
7439     //     arguments, hence MEMBER_OF(2)
7440     //
7441     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7442     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7443     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7444     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7445     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7446     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7447     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7448     // (*) the struct this entry pertains to is the 4th element in the list
7449     //     of arguments, hence MEMBER_OF(4)
7450 
7451     // Track if the map information being generated is the first for a capture.
7452     bool IsCaptureFirstInfo = IsFirstComponentList;
7453     // When the variable is on a declare target link or in a to clause with
7454     // unified memory, a reference is needed to hold the host/device address
7455     // of the variable.
7456     bool RequiresReference = false;
7457 
7458     // Scan the components from the base to the complete expression.
7459     auto CI = Components.rbegin();
7460     auto CE = Components.rend();
7461     auto I = CI;
7462 
7463     // Track if the map information being generated is the first for a list of
7464     // components.
7465     bool IsExpressionFirstInfo = true;
7466     bool FirstPointerInComplexData = false;
7467     Address BP = Address::invalid();
7468     const Expr *AssocExpr = I->getAssociatedExpression();
7469     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7470     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7471     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7472 
7473     if (isa<MemberExpr>(AssocExpr)) {
7474       // The base is the 'this' pointer. The content of the pointer is going
7475       // to be the base of the field being mapped.
7476       BP = CGF.LoadCXXThisAddress();
7477     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7478                (OASE &&
7479                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7480       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7481     } else if (OAShE &&
7482                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7483       BP = Address(
7484           CGF.EmitScalarExpr(OAShE->getBase()),
7485           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7486     } else {
7487       // The base is the reference to the variable.
7488       // BP = &Var.
7489       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7490       if (const auto *VD =
7491               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7492         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7493                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7494           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7495               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7496                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7497             RequiresReference = true;
7498             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7499           }
7500         }
7501       }
7502 
7503       // If the variable is a pointer and is being dereferenced (i.e. is not
7504       // the last component), the base has to be the pointer itself, not its
7505       // reference. References are ignored for mapping purposes.
7506       QualType Ty =
7507           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7508       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7509         // No need to generate individual map information for the pointer, it
7510         // can be associated with the combined storage if shared memory mode is
7511         // active or the base declaration is not global variable.
7512         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7513         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7514             !VD || VD->hasLocalStorage())
7515           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7516         else
7517           FirstPointerInComplexData = true;
7518         ++I;
7519       }
7520     }
7521 
7522     // Track whether a component of the list should be marked as MEMBER_OF some
7523     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7524     // in a component list should be marked as MEMBER_OF, all subsequent entries
7525     // do not belong to the base struct. E.g.
7526     // struct S2 s;
7527     // s.ps->ps->ps->f[:]
7528     //   (1) (2) (3) (4)
7529     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7530     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7531     // is the pointee of ps(2) which is not member of struct s, so it should not
7532     // be marked as such (it is still PTR_AND_OBJ).
7533     // The variable is initialized to false so that PTR_AND_OBJ entries which
7534     // are not struct members are not considered (e.g. array of pointers to
7535     // data).
7536     bool ShouldBeMemberOf = false;
7537 
7538     // Variable keeping track of whether or not we have encountered a component
7539     // in the component list which is a member expression. Useful when we have a
7540     // pointer or a final array section, in which case it is the previous
7541     // component in the list which tells us whether we have a member expression.
7542     // E.g. X.f[:]
7543     // While processing the final array section "[:]" it is "f" which tells us
7544     // whether we are dealing with a member of a declared struct.
7545     const MemberExpr *EncounteredME = nullptr;
7546 
7547     for (; I != CE; ++I) {
7548       // If the current component is member of a struct (parent struct) mark it.
7549       if (!EncounteredME) {
7550         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7551         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7552         // as MEMBER_OF the parent struct.
7553         if (EncounteredME) {
7554           ShouldBeMemberOf = true;
7555           // Do not emit as complex pointer if this is actually not array-like
7556           // expression.
7557           if (FirstPointerInComplexData) {
7558             QualType Ty = std::prev(I)
7559                               ->getAssociatedDeclaration()
7560                               ->getType()
7561                               .getNonReferenceType();
7562             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7563             FirstPointerInComplexData = false;
7564           }
7565         }
7566       }
7567 
7568       auto Next = std::next(I);
7569 
7570       // We need to generate the addresses and sizes if this is the last
7571       // component, if the component is a pointer or if it is an array section
7572       // whose length can't be proved to be one. If this is a pointer, it
7573       // becomes the base address for the following components.
7574 
7575       // A final array section, is one whose length can't be proved to be one.
7576       bool IsFinalArraySection =
7577           isFinalArraySectionExpression(I->getAssociatedExpression());
7578 
7579       // Get information on whether the element is a pointer. Have to do a
7580       // special treatment for array sections given that they are built-in
7581       // types.
7582       const auto *OASE =
7583           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7584       const auto *OAShE =
7585           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7586       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7587       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7588       bool IsPointer =
7589           OAShE ||
7590           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7591                        .getCanonicalType()
7592                        ->isAnyPointerType()) ||
7593           I->getAssociatedExpression()->getType()->isAnyPointerType();
7594       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7595 
7596       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7597         // If this is not the last component, we expect the pointer to be
7598         // associated with an array expression or member expression.
7599         assert((Next == CE ||
7600                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7601                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7602                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7603                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7604                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7605                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7606                "Unexpected expression");
7607 
7608         Address LB = Address::invalid();
7609         if (OAShE) {
7610           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7611                        CGF.getContext().getTypeAlignInChars(
7612                            OAShE->getBase()->getType()));
7613         } else {
7614           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7615                    .getAddress(CGF);
7616         }
7617 
7618         // If this component is a pointer inside the base struct then we don't
7619         // need to create any entry for it - it will be combined with the object
7620         // it is pointing to into a single PTR_AND_OBJ entry.
7621         bool IsMemberPointerOrAddr =
7622             (IsPointer || ForDeviceAddr) && EncounteredME &&
7623             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7624              EncounteredME);
7625         if (!OverlappedElements.empty()) {
7626           // Handle base element with the info for overlapped elements.
7627           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7628           assert(Next == CE &&
7629                  "Expected last element for the overlapped elements.");
7630           assert(!IsPointer &&
7631                  "Unexpected base element with the pointer type.");
7632           // Mark the whole struct as the struct that requires allocation on the
7633           // device.
7634           PartialStruct.LowestElem = {0, LB};
7635           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7636               I->getAssociatedExpression()->getType());
7637           Address HB = CGF.Builder.CreateConstGEP(
7638               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7639                                                               CGF.VoidPtrTy),
7640               TypeSize.getQuantity() - 1);
7641           PartialStruct.HighestElem = {
7642               std::numeric_limits<decltype(
7643                   PartialStruct.HighestElem.first)>::max(),
7644               HB};
7645           PartialStruct.Base = BP;
7646           // Emit data for non-overlapped data.
7647           OpenMPOffloadMappingFlags Flags =
7648               OMP_MAP_MEMBER_OF |
7649               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7650                              /*AddPtrFlag=*/false,
7651                              /*AddIsTargetParamFlag=*/false);
7652           LB = BP;
7653           llvm::Value *Size = nullptr;
7654           // Do bitcopy of all non-overlapped structure elements.
7655           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7656                    Component : OverlappedElements) {
7657             Address ComponentLB = Address::invalid();
7658             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7659                  Component) {
7660               if (MC.getAssociatedDeclaration()) {
7661                 ComponentLB =
7662                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7663                         .getAddress(CGF);
7664                 Size = CGF.Builder.CreatePtrDiff(
7665                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7666                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7667                 break;
7668               }
7669             }
7670             CombinedInfo.BasePointers.push_back(BP.getPointer());
7671             CombinedInfo.Pointers.push_back(LB.getPointer());
7672             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7673                 Size, CGF.Int64Ty, /*isSigned=*/true));
7674             CombinedInfo.Types.push_back(Flags);
7675             CombinedInfo.Mappers.push_back(nullptr);
7676             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7677           }
7678           CombinedInfo.BasePointers.push_back(BP.getPointer());
7679           CombinedInfo.Pointers.push_back(LB.getPointer());
7680           Size = CGF.Builder.CreatePtrDiff(
7681               CGF.EmitCastToVoidPtr(
7682                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7683               CGF.EmitCastToVoidPtr(LB.getPointer()));
7684           CombinedInfo.Sizes.push_back(
7685               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7686           CombinedInfo.Types.push_back(Flags);
7687           CombinedInfo.Mappers.push_back(nullptr);
7688           break;
7689         }
7690         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7691         if (!IsMemberPointerOrAddr) {
7692           CombinedInfo.BasePointers.push_back(BP.getPointer());
7693           CombinedInfo.Pointers.push_back(LB.getPointer());
7694           CombinedInfo.Sizes.push_back(
7695               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7696 
7697           // If Mapper is valid, the last component inherits the mapper.
7698           bool HasMapper = Mapper && Next == CE;
7699           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7700 
7701           // We need to add a pointer flag for each map that comes from the
7702           // same expression except for the first one. We also need to signal
7703           // this map is the first one that relates with the current capture
7704           // (there is a set of entries for each capture).
7705           OpenMPOffloadMappingFlags Flags =
7706               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7707                              !IsExpressionFirstInfo || RequiresReference ||
7708                                  FirstPointerInComplexData,
7709                              IsCaptureFirstInfo && !RequiresReference);
7710 
7711           if (!IsExpressionFirstInfo) {
7712             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7713             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7714             if (IsPointer)
7715               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7716                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7717 
7718             if (ShouldBeMemberOf) {
7719               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7720               // should be later updated with the correct value of MEMBER_OF.
7721               Flags |= OMP_MAP_MEMBER_OF;
7722               // From now on, all subsequent PTR_AND_OBJ entries should not be
7723               // marked as MEMBER_OF.
7724               ShouldBeMemberOf = false;
7725             }
7726           }
7727 
7728           CombinedInfo.Types.push_back(Flags);
7729         }
7730 
7731         // If we have encountered a member expression so far, keep track of the
7732         // mapped member. If the parent is "*this", then the value declaration
7733         // is nullptr.
7734         if (EncounteredME) {
7735           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7736           unsigned FieldIndex = FD->getFieldIndex();
7737 
7738           // Update info about the lowest and highest elements for this struct
7739           if (!PartialStruct.Base.isValid()) {
7740             PartialStruct.LowestElem = {FieldIndex, LB};
7741             if (IsFinalArraySection) {
7742               Address HB =
7743                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7744                       .getAddress(CGF);
7745               PartialStruct.HighestElem = {FieldIndex, HB};
7746             } else {
7747               PartialStruct.HighestElem = {FieldIndex, LB};
7748             }
7749             PartialStruct.Base = BP;
7750           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7751             PartialStruct.LowestElem = {FieldIndex, LB};
7752           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7753             PartialStruct.HighestElem = {FieldIndex, LB};
7754           }
7755         }
7756 
7757         // If we have a final array section, we are done with this expression.
7758         if (IsFinalArraySection)
7759           break;
7760 
7761         // The pointer becomes the base for the next element.
7762         if (Next != CE)
7763           BP = LB;
7764 
7765         IsExpressionFirstInfo = false;
7766         IsCaptureFirstInfo = false;
7767         FirstPointerInComplexData = false;
7768       }
7769     }
7770   }
7771 
7772   /// Return the adjusted map modifiers if the declaration a capture refers to
7773   /// appears in a first-private clause. This is expected to be used only with
7774   /// directives that start with 'target'.
7775   MappableExprsHandler::OpenMPOffloadMappingFlags
7776   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7777     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7778 
7779     // A first private variable captured by reference will use only the
7780     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7781     // declaration is known as first-private in this handler.
7782     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7783       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7784           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7785         return MappableExprsHandler::OMP_MAP_ALWAYS |
7786                MappableExprsHandler::OMP_MAP_TO;
7787       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7788         return MappableExprsHandler::OMP_MAP_TO |
7789                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7790       return MappableExprsHandler::OMP_MAP_PRIVATE |
7791              MappableExprsHandler::OMP_MAP_TO;
7792     }
7793     return MappableExprsHandler::OMP_MAP_TO |
7794            MappableExprsHandler::OMP_MAP_FROM;
7795   }
7796 
7797   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7798     // Rotate by getFlagMemberOffset() bits.
7799     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7800                                                   << getFlagMemberOffset());
7801   }
7802 
7803   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7804                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7805     // If the entry is PTR_AND_OBJ but has not been marked with the special
7806     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7807     // marked as MEMBER_OF.
7808     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7809         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7810       return;
7811 
7812     // Reset the placeholder value to prepare the flag for the assignment of the
7813     // proper MEMBER_OF value.
7814     Flags &= ~OMP_MAP_MEMBER_OF;
7815     Flags |= MemberOfFlag;
7816   }
7817 
7818   void getPlainLayout(const CXXRecordDecl *RD,
7819                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7820                       bool AsBase) const {
7821     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7822 
7823     llvm::StructType *St =
7824         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7825 
7826     unsigned NumElements = St->getNumElements();
7827     llvm::SmallVector<
7828         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7829         RecordLayout(NumElements);
7830 
7831     // Fill bases.
7832     for (const auto &I : RD->bases()) {
7833       if (I.isVirtual())
7834         continue;
7835       const auto *Base = I.getType()->getAsCXXRecordDecl();
7836       // Ignore empty bases.
7837       if (Base->isEmpty() || CGF.getContext()
7838                                  .getASTRecordLayout(Base)
7839                                  .getNonVirtualSize()
7840                                  .isZero())
7841         continue;
7842 
7843       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7844       RecordLayout[FieldIndex] = Base;
7845     }
7846     // Fill in virtual bases.
7847     for (const auto &I : RD->vbases()) {
7848       const auto *Base = I.getType()->getAsCXXRecordDecl();
7849       // Ignore empty bases.
7850       if (Base->isEmpty())
7851         continue;
7852       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7853       if (RecordLayout[FieldIndex])
7854         continue;
7855       RecordLayout[FieldIndex] = Base;
7856     }
7857     // Fill in all the fields.
7858     assert(!RD->isUnion() && "Unexpected union.");
7859     for (const auto *Field : RD->fields()) {
7860       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7861       // will fill in later.)
7862       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7863         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7864         RecordLayout[FieldIndex] = Field;
7865       }
7866     }
7867     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7868              &Data : RecordLayout) {
7869       if (Data.isNull())
7870         continue;
7871       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7872         getPlainLayout(Base, Layout, /*AsBase=*/true);
7873       else
7874         Layout.push_back(Data.get<const FieldDecl *>());
7875     }
7876   }
7877 
7878 public:
7879   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7880       : CurDir(&Dir), CGF(CGF) {
7881     // Extract firstprivate clause information.
7882     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7883       for (const auto *D : C->varlists())
7884         FirstPrivateDecls.try_emplace(
7885             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7886     // Extract implicit firstprivates from uses_allocators clauses.
7887     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7888       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7889         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7890         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7891           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7892                                         /*Implicit=*/true);
7893         else if (const auto *VD = dyn_cast<VarDecl>(
7894                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7895                          ->getDecl()))
7896           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7897       }
7898     }
7899     // Extract device pointer clause information.
7900     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7901       for (auto L : C->component_lists())
7902         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
7903   }
7904 
7905   /// Constructor for the declare mapper directive.
7906   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7907       : CurDir(&Dir), CGF(CGF) {}
7908 
7909   /// Generate code for the combined entry if we have a partially mapped struct
7910   /// and take care of the mapping flags of the arguments corresponding to
7911   /// individual struct members.
7912   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
7913                          MapFlagsArrayTy &CurTypes,
7914                          const StructRangeInfoTy &PartialStruct) const {
7915     // Base is the base of the struct
7916     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
7917     // Pointer is the address of the lowest element
7918     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7919     CombinedInfo.Pointers.push_back(LB);
7920     // There should not be a mapper for a combined entry.
7921     CombinedInfo.Mappers.push_back(nullptr);
7922     // Size is (addr of {highest+1} element) - (addr of lowest element)
7923     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7924     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7925     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7926     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7927     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7928     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7929                                                   /*isSigned=*/false);
7930     CombinedInfo.Sizes.push_back(Size);
7931     // Map type is always TARGET_PARAM
7932     CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM);
7933     // If any element has the present modifier, then make sure the runtime
7934     // doesn't attempt to allocate the struct.
7935     if (CurTypes.end() !=
7936         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
7937           return Type & OMP_MAP_PRESENT;
7938         }))
7939       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
7940     // Remove TARGET_PARAM flag from the first element
7941     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7942 
7943     // All other current entries will be MEMBER_OF the combined entry
7944     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7945     // 0xFFFF in the MEMBER_OF field).
7946     OpenMPOffloadMappingFlags MemberOfFlag =
7947         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
7948     for (auto &M : CurTypes)
7949       setCorrectMemberOfFlag(M, MemberOfFlag);
7950   }
7951 
7952   /// Generate all the base pointers, section pointers, sizes, map types, and
7953   /// mappers for the extracted mappable expressions (all included in \a
7954   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7955   /// pair of the relevant declaration and index where it occurs is appended to
7956   /// the device pointers info array.
7957   void generateAllInfo(
7958       MapCombinedInfoTy &CombinedInfo,
7959       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7960           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7961     // We have to process the component lists that relate with the same
7962     // declaration in a single chunk so that we can generate the map flags
7963     // correctly. Therefore, we organize all lists in a map.
7964     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7965 
7966     // Helper function to fill the information map for the different supported
7967     // clauses.
7968     auto &&InfoGen =
7969         [&Info, &SkipVarSet](
7970             const ValueDecl *D,
7971             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7972             OpenMPMapClauseKind MapType,
7973             ArrayRef<OpenMPMapModifierKind> MapModifiers,
7974             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7975             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7976             bool ForDeviceAddr = false) {
7977           const ValueDecl *VD =
7978               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7979           if (SkipVarSet.count(VD))
7980             return;
7981           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
7982                                 ReturnDevicePointer, IsImplicit, Mapper,
7983                                 ForDeviceAddr);
7984         };
7985 
7986     assert(CurDir.is<const OMPExecutableDirective *>() &&
7987            "Expect a executable directive");
7988     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7989     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7990       for (const auto L : C->component_lists()) {
7991         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
7992                 C->getMapTypeModifiers(), llvm::None,
7993                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
7994       }
7995     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7996       for (const auto L : C->component_lists()) {
7997         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
7998                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7999                 C->isImplicit(), std::get<2>(L));
8000       }
8001     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8002       for (const auto L : C->component_lists()) {
8003         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8004                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8005                 C->isImplicit(), std::get<2>(L));
8006       }
8007 
8008     // Look at the use_device_ptr clause information and mark the existing map
8009     // entries as such. If there is no map information for an entry in the
8010     // use_device_ptr list, we create one with map type 'alloc' and zero size
8011     // section. It is the user fault if that was not mapped before. If there is
8012     // no map information and the pointer is a struct member, then we defer the
8013     // emission of that entry until the whole struct has been processed.
8014     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8015         DeferredInfo;
8016     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8017 
8018     for (const auto *C :
8019          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8020       for (const auto L : C->component_lists()) {
8021         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8022             std::get<1>(L);
8023         assert(!Components.empty() &&
8024                "Not expecting empty list of components!");
8025         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8026         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8027         const Expr *IE = Components.back().getAssociatedExpression();
8028         // If the first component is a member expression, we have to look into
8029         // 'this', which maps to null in the map of map information. Otherwise
8030         // look directly for the information.
8031         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8032 
8033         // We potentially have map information for this declaration already.
8034         // Look for the first set of components that refer to it.
8035         if (It != Info.end()) {
8036           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8037             return MI.Components.back().getAssociatedDeclaration() == VD;
8038           });
8039           // If we found a map entry, signal that the pointer has to be returned
8040           // and move on to the next declaration.
8041           // Exclude cases where the base pointer is mapped as array subscript,
8042           // array section or array shaping. The base address is passed as a
8043           // pointer to base in this case and cannot be used as a base for
8044           // use_device_ptr list item.
8045           if (CI != It->second.end()) {
8046             auto PrevCI = std::next(CI->Components.rbegin());
8047             const auto *VarD = dyn_cast<VarDecl>(VD);
8048             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8049                 isa<MemberExpr>(IE) ||
8050                 !VD->getType().getNonReferenceType()->isPointerType() ||
8051                 PrevCI == CI->Components.rend() ||
8052                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8053                 VarD->hasLocalStorage()) {
8054               CI->ReturnDevicePointer = true;
8055               continue;
8056             }
8057           }
8058         }
8059 
8060         // We didn't find any match in our map information - generate a zero
8061         // size array section - if the pointer is a struct member we defer this
8062         // action until the whole struct has been processed.
8063         if (isa<MemberExpr>(IE)) {
8064           // Insert the pointer into Info to be processed by
8065           // generateInfoForComponentList. Because it is a member pointer
8066           // without a pointee, no entry will be generated for it, therefore
8067           // we need to generate one after the whole struct has been processed.
8068           // Nonetheless, generateInfoForComponentList must be called to take
8069           // the pointer into account for the calculation of the range of the
8070           // partial struct.
8071           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8072                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8073           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8074         } else {
8075           llvm::Value *Ptr =
8076               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8077           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8078           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8079           UseDevicePtrCombinedInfo.Sizes.push_back(
8080               llvm::Constant::getNullValue(CGF.Int64Ty));
8081           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM |
8082                                                    OMP_MAP_TARGET_PARAM);
8083           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8084         }
8085       }
8086     }
8087 
8088     // Look at the use_device_addr clause information and mark the existing map
8089     // entries as such. If there is no map information for an entry in the
8090     // use_device_addr list, we create one with map type 'alloc' and zero size
8091     // section. It is the user fault if that was not mapped before. If there is
8092     // no map information and the pointer is a struct member, then we defer the
8093     // emission of that entry until the whole struct has been processed.
8094     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8095     for (const auto *C :
8096          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8097       for (const auto L : C->component_lists()) {
8098         assert(!std::get<1>(L).empty() &&
8099                "Not expecting empty list of components!");
8100         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8101         if (!Processed.insert(VD).second)
8102           continue;
8103         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8104         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8105         // If the first component is a member expression, we have to look into
8106         // 'this', which maps to null in the map of map information. Otherwise
8107         // look directly for the information.
8108         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8109 
8110         // We potentially have map information for this declaration already.
8111         // Look for the first set of components that refer to it.
8112         if (It != Info.end()) {
8113           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8114             return MI.Components.back().getAssociatedDeclaration() == VD;
8115           });
8116           // If we found a map entry, signal that the pointer has to be returned
8117           // and move on to the next declaration.
8118           if (CI != It->second.end()) {
8119             CI->ReturnDevicePointer = true;
8120             continue;
8121           }
8122         }
8123 
8124         // We didn't find any match in our map information - generate a zero
8125         // size array section - if the pointer is a struct member we defer this
8126         // action until the whole struct has been processed.
8127         if (isa<MemberExpr>(IE)) {
8128           // Insert the pointer into Info to be processed by
8129           // generateInfoForComponentList. Because it is a member pointer
8130           // without a pointee, no entry will be generated for it, therefore
8131           // we need to generate one after the whole struct has been processed.
8132           // Nonetheless, generateInfoForComponentList must be called to take
8133           // the pointer into account for the calculation of the range of the
8134           // partial struct.
8135           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8136                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8137                   nullptr, /*ForDeviceAddr=*/true);
8138           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8139         } else {
8140           llvm::Value *Ptr;
8141           if (IE->isGLValue())
8142             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8143           else
8144             Ptr = CGF.EmitScalarExpr(IE);
8145           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8146           CombinedInfo.Pointers.push_back(Ptr);
8147           CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8148           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8149           CombinedInfo.Mappers.push_back(nullptr);
8150         }
8151       }
8152     }
8153 
8154     for (const auto &M : Info) {
8155       // We need to know when we generate information for the first component
8156       // associated with a capture, because the mapping flags depend on it.
8157       bool IsFirstComponentList = true;
8158 
8159       // Temporary generated information.
8160       MapCombinedInfoTy CurInfo;
8161       StructRangeInfoTy PartialStruct;
8162 
8163       for (const MapInfo &L : M.second) {
8164         assert(!L.Components.empty() &&
8165                "Not expecting declaration with no component lists.");
8166 
8167         // Remember the current base pointer index.
8168         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8169         generateInfoForComponentList(L.MapType, L.MapModifiers,
8170                                      L.MotionModifiers, L.Components, CurInfo,
8171                                      PartialStruct, IsFirstComponentList,
8172                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8173 
8174         // If this entry relates with a device pointer, set the relevant
8175         // declaration and add the 'return pointer' flag.
8176         if (L.ReturnDevicePointer) {
8177           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8178                  "Unexpected number of mapped base pointers.");
8179 
8180           const ValueDecl *RelevantVD =
8181               L.Components.back().getAssociatedDeclaration();
8182           assert(RelevantVD &&
8183                  "No relevant declaration related with device pointer??");
8184 
8185           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8186               RelevantVD);
8187           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8188         }
8189         IsFirstComponentList = false;
8190       }
8191 
8192       // Append any pending zero-length pointers which are struct members and
8193       // used with use_device_ptr or use_device_addr.
8194       auto CI = DeferredInfo.find(M.first);
8195       if (CI != DeferredInfo.end()) {
8196         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8197           llvm::Value *BasePtr;
8198           llvm::Value *Ptr;
8199           if (L.ForDeviceAddr) {
8200             if (L.IE->isGLValue())
8201               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8202             else
8203               Ptr = this->CGF.EmitScalarExpr(L.IE);
8204             BasePtr = Ptr;
8205             // Entry is RETURN_PARAM. Also, set the placeholder value
8206             // MEMBER_OF=FFFF so that the entry is later updated with the
8207             // correct value of MEMBER_OF.
8208             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8209           } else {
8210             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8211             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8212                                              L.IE->getExprLoc());
8213             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8214             // value MEMBER_OF=FFFF so that the entry is later updated with the
8215             // correct value of MEMBER_OF.
8216             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8217                                     OMP_MAP_MEMBER_OF);
8218           }
8219           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8220           CurInfo.Pointers.push_back(Ptr);
8221           CurInfo.Sizes.push_back(
8222               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8223           CurInfo.Mappers.push_back(nullptr);
8224         }
8225       }
8226 
8227       // If there is an entry in PartialStruct it means we have a struct with
8228       // individual members mapped. Emit an extra combined entry.
8229       if (PartialStruct.Base.isValid())
8230         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8231 
8232       // We need to append the results of this capture to what we already have.
8233       CombinedInfo.append(CurInfo);
8234     }
8235     // Append data for use_device_ptr clauses.
8236     CombinedInfo.append(UseDevicePtrCombinedInfo);
8237   }
8238 
8239   /// Generate all the base pointers, section pointers, sizes, map types, and
8240   /// mappers for the extracted map clauses of user-defined mapper (all included
8241   /// in \a CombinedInfo).
8242   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8243     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8244            "Expect a declare mapper directive");
8245     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8246     // We have to process the component lists that relate with the same
8247     // declaration in a single chunk so that we can generate the map flags
8248     // correctly. Therefore, we organize all lists in a map.
8249     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8250 
8251     // Fill the information map for map clauses.
8252     for (const auto *C : CurMapperDir->clauselists()) {
8253       const auto *MC = cast<OMPMapClause>(C);
8254       for (const auto L : MC->component_lists()) {
8255         const ValueDecl *VD =
8256             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8257                            : nullptr;
8258         // Get the corresponding user-defined mapper.
8259         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8260                               MC->getMapTypeModifiers(), llvm::None,
8261                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8262                               std::get<2>(L));
8263       }
8264     }
8265 
8266     for (const auto &M : Info) {
8267       // We need to know when we generate information for the first component
8268       // associated with a capture, because the mapping flags depend on it.
8269       bool IsFirstComponentList = true;
8270 
8271       // Temporary generated information.
8272       MapCombinedInfoTy CurInfo;
8273       StructRangeInfoTy PartialStruct;
8274 
8275       for (const MapInfo &L : M.second) {
8276         assert(!L.Components.empty() &&
8277                "Not expecting declaration with no component lists.");
8278         generateInfoForComponentList(L.MapType, L.MapModifiers,
8279                                      L.MotionModifiers, L.Components, CurInfo,
8280                                      PartialStruct, IsFirstComponentList,
8281                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8282         IsFirstComponentList = false;
8283       }
8284 
8285       // If there is an entry in PartialStruct it means we have a struct with
8286       // individual members mapped. Emit an extra combined entry.
8287       if (PartialStruct.Base.isValid())
8288         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8289 
8290       // We need to append the results of this capture to what we already have.
8291       CombinedInfo.append(CurInfo);
8292     }
8293   }
8294 
8295   /// Emit capture info for lambdas for variables captured by reference.
8296   void generateInfoForLambdaCaptures(
8297       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8298       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8299     const auto *RD = VD->getType()
8300                          .getCanonicalType()
8301                          .getNonReferenceType()
8302                          ->getAsCXXRecordDecl();
8303     if (!RD || !RD->isLambda())
8304       return;
8305     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8306     LValue VDLVal = CGF.MakeAddrLValue(
8307         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8308     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8309     FieldDecl *ThisCapture = nullptr;
8310     RD->getCaptureFields(Captures, ThisCapture);
8311     if (ThisCapture) {
8312       LValue ThisLVal =
8313           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8314       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8315       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8316                                  VDLVal.getPointer(CGF));
8317       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8318       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8319       CombinedInfo.Sizes.push_back(
8320           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8321                                     CGF.Int64Ty, /*isSigned=*/true));
8322       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8323                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8324       CombinedInfo.Mappers.push_back(nullptr);
8325     }
8326     for (const LambdaCapture &LC : RD->captures()) {
8327       if (!LC.capturesVariable())
8328         continue;
8329       const VarDecl *VD = LC.getCapturedVar();
8330       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8331         continue;
8332       auto It = Captures.find(VD);
8333       assert(It != Captures.end() && "Found lambda capture without field.");
8334       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8335       if (LC.getCaptureKind() == LCK_ByRef) {
8336         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8337         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8338                                    VDLVal.getPointer(CGF));
8339         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8340         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8341         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8342             CGF.getTypeSize(
8343                 VD->getType().getCanonicalType().getNonReferenceType()),
8344             CGF.Int64Ty, /*isSigned=*/true));
8345       } else {
8346         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8347         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8348                                    VDLVal.getPointer(CGF));
8349         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8350         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8351         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8352       }
8353       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8354                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8355       CombinedInfo.Mappers.push_back(nullptr);
8356     }
8357   }
8358 
8359   /// Set correct indices for lambdas captures.
8360   void adjustMemberOfForLambdaCaptures(
8361       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8362       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8363       MapFlagsArrayTy &Types) const {
8364     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8365       // Set correct member_of idx for all implicit lambda captures.
8366       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8367                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8368         continue;
8369       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8370       assert(BasePtr && "Unable to find base lambda address.");
8371       int TgtIdx = -1;
8372       for (unsigned J = I; J > 0; --J) {
8373         unsigned Idx = J - 1;
8374         if (Pointers[Idx] != BasePtr)
8375           continue;
8376         TgtIdx = Idx;
8377         break;
8378       }
8379       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8380       // All other current entries will be MEMBER_OF the combined entry
8381       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8382       // 0xFFFF in the MEMBER_OF field).
8383       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8384       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8385     }
8386   }
8387 
8388   /// Generate the base pointers, section pointers, sizes, map types, and
8389   /// mappers associated to a given capture (all included in \a CombinedInfo).
8390   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8391                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8392                               StructRangeInfoTy &PartialStruct) const {
8393     assert(!Cap->capturesVariableArrayType() &&
8394            "Not expecting to generate map info for a variable array type!");
8395 
8396     // We need to know when we generating information for the first component
8397     const ValueDecl *VD = Cap->capturesThis()
8398                               ? nullptr
8399                               : Cap->getCapturedVar()->getCanonicalDecl();
8400 
8401     // If this declaration appears in a is_device_ptr clause we just have to
8402     // pass the pointer by value. If it is a reference to a declaration, we just
8403     // pass its value.
8404     if (DevPointersMap.count(VD)) {
8405       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8406       CombinedInfo.Pointers.push_back(Arg);
8407       CombinedInfo.Sizes.push_back(
8408           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8409                                     CGF.Int64Ty, /*isSigned=*/true));
8410       CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8411       CombinedInfo.Mappers.push_back(nullptr);
8412       return;
8413     }
8414 
8415     using MapData =
8416         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8417                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8418                    const ValueDecl *>;
8419     SmallVector<MapData, 4> DeclComponentLists;
8420     assert(CurDir.is<const OMPExecutableDirective *>() &&
8421            "Expect a executable directive");
8422     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8423     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8424       for (const auto L : C->decl_component_lists(VD)) {
8425         const ValueDecl *VDecl, *Mapper;
8426         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8427         std::tie(VDecl, Components, Mapper) = L;
8428         assert(VDecl == VD && "We got information for the wrong declaration??");
8429         assert(!Components.empty() &&
8430                "Not expecting declaration with no component lists.");
8431         DeclComponentLists.emplace_back(Components, C->getMapType(),
8432                                         C->getMapTypeModifiers(),
8433                                         C->isImplicit(), Mapper);
8434       }
8435     }
8436 
8437     // Find overlapping elements (including the offset from the base element).
8438     llvm::SmallDenseMap<
8439         const MapData *,
8440         llvm::SmallVector<
8441             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8442         4>
8443         OverlappedData;
8444     size_t Count = 0;
8445     for (const MapData &L : DeclComponentLists) {
8446       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8447       OpenMPMapClauseKind MapType;
8448       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8449       bool IsImplicit;
8450       const ValueDecl *Mapper;
8451       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8452       ++Count;
8453       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8454         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8455         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8456         auto CI = Components.rbegin();
8457         auto CE = Components.rend();
8458         auto SI = Components1.rbegin();
8459         auto SE = Components1.rend();
8460         for (; CI != CE && SI != SE; ++CI, ++SI) {
8461           if (CI->getAssociatedExpression()->getStmtClass() !=
8462               SI->getAssociatedExpression()->getStmtClass())
8463             break;
8464           // Are we dealing with different variables/fields?
8465           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8466             break;
8467         }
8468         // Found overlapping if, at least for one component, reached the head of
8469         // the components list.
8470         if (CI == CE || SI == SE) {
8471           assert((CI != CE || SI != SE) &&
8472                  "Unexpected full match of the mapping components.");
8473           const MapData &BaseData = CI == CE ? L : L1;
8474           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8475               SI == SE ? Components : Components1;
8476           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8477           OverlappedElements.getSecond().push_back(SubData);
8478         }
8479       }
8480     }
8481     // Sort the overlapped elements for each item.
8482     llvm::SmallVector<const FieldDecl *, 4> Layout;
8483     if (!OverlappedData.empty()) {
8484       if (const auto *CRD =
8485               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8486         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8487       else {
8488         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8489         Layout.append(RD->field_begin(), RD->field_end());
8490       }
8491     }
8492     for (auto &Pair : OverlappedData) {
8493       llvm::sort(
8494           Pair.getSecond(),
8495           [&Layout](
8496               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8497               OMPClauseMappableExprCommon::MappableExprComponentListRef
8498                   Second) {
8499             auto CI = First.rbegin();
8500             auto CE = First.rend();
8501             auto SI = Second.rbegin();
8502             auto SE = Second.rend();
8503             for (; CI != CE && SI != SE; ++CI, ++SI) {
8504               if (CI->getAssociatedExpression()->getStmtClass() !=
8505                   SI->getAssociatedExpression()->getStmtClass())
8506                 break;
8507               // Are we dealing with different variables/fields?
8508               if (CI->getAssociatedDeclaration() !=
8509                   SI->getAssociatedDeclaration())
8510                 break;
8511             }
8512 
8513             // Lists contain the same elements.
8514             if (CI == CE && SI == SE)
8515               return false;
8516 
8517             // List with less elements is less than list with more elements.
8518             if (CI == CE || SI == SE)
8519               return CI == CE;
8520 
8521             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8522             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8523             if (FD1->getParent() == FD2->getParent())
8524               return FD1->getFieldIndex() < FD2->getFieldIndex();
8525             const auto It =
8526                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8527                   return FD == FD1 || FD == FD2;
8528                 });
8529             return *It == FD1;
8530           });
8531     }
8532 
8533     // Associated with a capture, because the mapping flags depend on it.
8534     // Go through all of the elements with the overlapped elements.
8535     for (const auto &Pair : OverlappedData) {
8536       const MapData &L = *Pair.getFirst();
8537       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8538       OpenMPMapClauseKind MapType;
8539       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8540       bool IsImplicit;
8541       const ValueDecl *Mapper;
8542       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8543       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8544           OverlappedComponents = Pair.getSecond();
8545       bool IsFirstComponentList = true;
8546       generateInfoForComponentList(
8547           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8548           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8549           /*ForDeviceAddr=*/false, OverlappedComponents);
8550     }
8551     // Go through other elements without overlapped elements.
8552     bool IsFirstComponentList = OverlappedData.empty();
8553     for (const MapData &L : DeclComponentLists) {
8554       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8555       OpenMPMapClauseKind MapType;
8556       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8557       bool IsImplicit;
8558       const ValueDecl *Mapper;
8559       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8560       auto It = OverlappedData.find(&L);
8561       if (It == OverlappedData.end())
8562         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8563                                      Components, CombinedInfo, PartialStruct,
8564                                      IsFirstComponentList, IsImplicit, Mapper);
8565       IsFirstComponentList = false;
8566     }
8567   }
8568 
8569   /// Generate the default map information for a given capture \a CI,
8570   /// record field declaration \a RI and captured value \a CV.
8571   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8572                               const FieldDecl &RI, llvm::Value *CV,
8573                               MapCombinedInfoTy &CombinedInfo) const {
8574     bool IsImplicit = true;
8575     // Do the default mapping.
8576     if (CI.capturesThis()) {
8577       CombinedInfo.BasePointers.push_back(CV);
8578       CombinedInfo.Pointers.push_back(CV);
8579       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8580       CombinedInfo.Sizes.push_back(
8581           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8582                                     CGF.Int64Ty, /*isSigned=*/true));
8583       // Default map type.
8584       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8585     } else if (CI.capturesVariableByCopy()) {
8586       CombinedInfo.BasePointers.push_back(CV);
8587       CombinedInfo.Pointers.push_back(CV);
8588       if (!RI.getType()->isAnyPointerType()) {
8589         // We have to signal to the runtime captures passed by value that are
8590         // not pointers.
8591         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8592         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8593             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8594       } else {
8595         // Pointers are implicitly mapped with a zero size and no flags
8596         // (other than first map that is added for all implicit maps).
8597         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8598         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8599       }
8600       const VarDecl *VD = CI.getCapturedVar();
8601       auto I = FirstPrivateDecls.find(VD);
8602       if (I != FirstPrivateDecls.end())
8603         IsImplicit = I->getSecond();
8604     } else {
8605       assert(CI.capturesVariable() && "Expected captured reference.");
8606       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8607       QualType ElementType = PtrTy->getPointeeType();
8608       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8609           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8610       // The default map type for a scalar/complex type is 'to' because by
8611       // default the value doesn't have to be retrieved. For an aggregate
8612       // type, the default is 'tofrom'.
8613       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8614       const VarDecl *VD = CI.getCapturedVar();
8615       auto I = FirstPrivateDecls.find(VD);
8616       if (I != FirstPrivateDecls.end() &&
8617           VD->getType().isConstant(CGF.getContext())) {
8618         llvm::Constant *Addr =
8619             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8620         // Copy the value of the original variable to the new global copy.
8621         CGF.Builder.CreateMemCpy(
8622             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8623             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8624             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8625         // Use new global variable as the base pointers.
8626         CombinedInfo.BasePointers.push_back(Addr);
8627         CombinedInfo.Pointers.push_back(Addr);
8628       } else {
8629         CombinedInfo.BasePointers.push_back(CV);
8630         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8631           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8632               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8633               AlignmentSource::Decl));
8634           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8635         } else {
8636           CombinedInfo.Pointers.push_back(CV);
8637         }
8638       }
8639       if (I != FirstPrivateDecls.end())
8640         IsImplicit = I->getSecond();
8641     }
8642     // Every default map produces a single argument which is a target parameter.
8643     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8644 
8645     // Add flag stating this is an implicit map.
8646     if (IsImplicit)
8647       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8648 
8649     // No user-defined mapper for default mapping.
8650     CombinedInfo.Mappers.push_back(nullptr);
8651   }
8652 };
8653 } // anonymous namespace
8654 
8655 /// Emit the arrays used to pass the captures and map information to the
8656 /// offloading runtime library. If there is no map or capture information,
8657 /// return nullptr by reference.
8658 static void
8659 emitOffloadingArrays(CodeGenFunction &CGF,
8660                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8661                      CGOpenMPRuntime::TargetDataInfo &Info) {
8662   CodeGenModule &CGM = CGF.CGM;
8663   ASTContext &Ctx = CGF.getContext();
8664 
8665   // Reset the array information.
8666   Info.clearArrayInfo();
8667   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8668 
8669   if (Info.NumberOfPtrs) {
8670     // Detect if we have any capture size requiring runtime evaluation of the
8671     // size so that a constant array could be eventually used.
8672     bool hasRuntimeEvaluationCaptureSize = false;
8673     for (llvm::Value *S : CombinedInfo.Sizes)
8674       if (!isa<llvm::Constant>(S)) {
8675         hasRuntimeEvaluationCaptureSize = true;
8676         break;
8677       }
8678 
8679     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8680     QualType PointerArrayType = Ctx.getConstantArrayType(
8681         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8682         /*IndexTypeQuals=*/0);
8683 
8684     Info.BasePointersArray =
8685         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8686     Info.PointersArray =
8687         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8688     Address MappersArray =
8689         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
8690     Info.MappersArray = MappersArray.getPointer();
8691 
8692     // If we don't have any VLA types or other types that require runtime
8693     // evaluation, we can use a constant array for the map sizes, otherwise we
8694     // need to fill up the arrays as we do for the pointers.
8695     QualType Int64Ty =
8696         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8697     if (hasRuntimeEvaluationCaptureSize) {
8698       QualType SizeArrayType = Ctx.getConstantArrayType(
8699           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8700           /*IndexTypeQuals=*/0);
8701       Info.SizesArray =
8702           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8703     } else {
8704       // We expect all the sizes to be constant, so we collect them to create
8705       // a constant array.
8706       SmallVector<llvm::Constant *, 16> ConstSizes;
8707       for (llvm::Value *S : CombinedInfo.Sizes)
8708         ConstSizes.push_back(cast<llvm::Constant>(S));
8709 
8710       auto *SizesArrayInit = llvm::ConstantArray::get(
8711           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8712       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8713       auto *SizesArrayGbl = new llvm::GlobalVariable(
8714           CGM.getModule(), SizesArrayInit->getType(),
8715           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8716           SizesArrayInit, Name);
8717       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8718       Info.SizesArray = SizesArrayGbl;
8719     }
8720 
8721     // The map types are always constant so we don't need to generate code to
8722     // fill arrays. Instead, we create an array constant.
8723     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
8724     llvm::copy(CombinedInfo.Types, Mapping.begin());
8725     llvm::Constant *MapTypesArrayInit =
8726         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8727     std::string MaptypesName =
8728         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8729     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8730         CGM.getModule(), MapTypesArrayInit->getType(),
8731         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8732         MapTypesArrayInit, MaptypesName);
8733     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8734     Info.MapTypesArray = MapTypesArrayGbl;
8735 
8736     // If there's a present map type modifier, it must not be applied to the end
8737     // of a region, so generate a separate map type array in that case.
8738     if (Info.separateBeginEndCalls()) {
8739       bool EndMapTypesDiffer = false;
8740       for (uint64_t &Type : Mapping) {
8741         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
8742           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
8743           EndMapTypesDiffer = true;
8744         }
8745       }
8746       if (EndMapTypesDiffer) {
8747         MapTypesArrayInit =
8748             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8749         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8750         MapTypesArrayGbl = new llvm::GlobalVariable(
8751             CGM.getModule(), MapTypesArrayInit->getType(),
8752             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8753             MapTypesArrayInit, MaptypesName);
8754         MapTypesArrayGbl->setUnnamedAddr(
8755             llvm::GlobalValue::UnnamedAddr::Global);
8756         Info.MapTypesArrayEnd = MapTypesArrayGbl;
8757       }
8758     }
8759 
8760     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8761       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
8762       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8763           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8764           Info.BasePointersArray, 0, I);
8765       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8766           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8767       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8768       CGF.Builder.CreateStore(BPVal, BPAddr);
8769 
8770       if (Info.requiresDevicePointerInfo())
8771         if (const ValueDecl *DevVD =
8772                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
8773           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8774 
8775       llvm::Value *PVal = CombinedInfo.Pointers[I];
8776       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8777           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8778           Info.PointersArray, 0, I);
8779       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8780           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8781       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8782       CGF.Builder.CreateStore(PVal, PAddr);
8783 
8784       if (hasRuntimeEvaluationCaptureSize) {
8785         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8786             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8787             Info.SizesArray,
8788             /*Idx0=*/0,
8789             /*Idx1=*/I);
8790         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8791         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
8792                                                           CGM.Int64Ty,
8793                                                           /*isSigned=*/true),
8794                                 SAddr);
8795       }
8796 
8797       // Fill up the mapper array.
8798       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
8799       if (CombinedInfo.Mappers[I]) {
8800         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8801             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8802         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
8803         Info.HasMapper = true;
8804       }
8805       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
8806       CGF.Builder.CreateStore(MFunc, MAddr);
8807     }
8808   }
8809 }
8810 
8811 /// Emit the arguments to be passed to the runtime library based on the
8812 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
8813 /// ForEndCall, emit map types to be passed for the end of the region instead of
8814 /// the beginning.
8815 static void emitOffloadingArraysArgument(
8816     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8817     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8818     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
8819     CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) {
8820   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
8821          "expected region end call to runtime only when end call is separate");
8822   CodeGenModule &CGM = CGF.CGM;
8823   if (Info.NumberOfPtrs) {
8824     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8825         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8826         Info.BasePointersArray,
8827         /*Idx0=*/0, /*Idx1=*/0);
8828     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8829         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8830         Info.PointersArray,
8831         /*Idx0=*/0,
8832         /*Idx1=*/0);
8833     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8834         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8835         /*Idx0=*/0, /*Idx1=*/0);
8836     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8837         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8838         ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
8839                                             : Info.MapTypesArray,
8840         /*Idx0=*/0,
8841         /*Idx1=*/0);
8842     MappersArrayArg =
8843         Info.HasMapper
8844             ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
8845             : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8846   } else {
8847     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8848     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8849     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8850     MapTypesArrayArg =
8851         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8852     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8853   }
8854 }
8855 
8856 /// Check for inner distribute directive.
8857 static const OMPExecutableDirective *
8858 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8859   const auto *CS = D.getInnermostCapturedStmt();
8860   const auto *Body =
8861       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8862   const Stmt *ChildStmt =
8863       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8864 
8865   if (const auto *NestedDir =
8866           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8867     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8868     switch (D.getDirectiveKind()) {
8869     case OMPD_target:
8870       if (isOpenMPDistributeDirective(DKind))
8871         return NestedDir;
8872       if (DKind == OMPD_teams) {
8873         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8874             /*IgnoreCaptured=*/true);
8875         if (!Body)
8876           return nullptr;
8877         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8878         if (const auto *NND =
8879                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8880           DKind = NND->getDirectiveKind();
8881           if (isOpenMPDistributeDirective(DKind))
8882             return NND;
8883         }
8884       }
8885       return nullptr;
8886     case OMPD_target_teams:
8887       if (isOpenMPDistributeDirective(DKind))
8888         return NestedDir;
8889       return nullptr;
8890     case OMPD_target_parallel:
8891     case OMPD_target_simd:
8892     case OMPD_target_parallel_for:
8893     case OMPD_target_parallel_for_simd:
8894       return nullptr;
8895     case OMPD_target_teams_distribute:
8896     case OMPD_target_teams_distribute_simd:
8897     case OMPD_target_teams_distribute_parallel_for:
8898     case OMPD_target_teams_distribute_parallel_for_simd:
8899     case OMPD_parallel:
8900     case OMPD_for:
8901     case OMPD_parallel_for:
8902     case OMPD_parallel_master:
8903     case OMPD_parallel_sections:
8904     case OMPD_for_simd:
8905     case OMPD_parallel_for_simd:
8906     case OMPD_cancel:
8907     case OMPD_cancellation_point:
8908     case OMPD_ordered:
8909     case OMPD_threadprivate:
8910     case OMPD_allocate:
8911     case OMPD_task:
8912     case OMPD_simd:
8913     case OMPD_sections:
8914     case OMPD_section:
8915     case OMPD_single:
8916     case OMPD_master:
8917     case OMPD_critical:
8918     case OMPD_taskyield:
8919     case OMPD_barrier:
8920     case OMPD_taskwait:
8921     case OMPD_taskgroup:
8922     case OMPD_atomic:
8923     case OMPD_flush:
8924     case OMPD_depobj:
8925     case OMPD_scan:
8926     case OMPD_teams:
8927     case OMPD_target_data:
8928     case OMPD_target_exit_data:
8929     case OMPD_target_enter_data:
8930     case OMPD_distribute:
8931     case OMPD_distribute_simd:
8932     case OMPD_distribute_parallel_for:
8933     case OMPD_distribute_parallel_for_simd:
8934     case OMPD_teams_distribute:
8935     case OMPD_teams_distribute_simd:
8936     case OMPD_teams_distribute_parallel_for:
8937     case OMPD_teams_distribute_parallel_for_simd:
8938     case OMPD_target_update:
8939     case OMPD_declare_simd:
8940     case OMPD_declare_variant:
8941     case OMPD_begin_declare_variant:
8942     case OMPD_end_declare_variant:
8943     case OMPD_declare_target:
8944     case OMPD_end_declare_target:
8945     case OMPD_declare_reduction:
8946     case OMPD_declare_mapper:
8947     case OMPD_taskloop:
8948     case OMPD_taskloop_simd:
8949     case OMPD_master_taskloop:
8950     case OMPD_master_taskloop_simd:
8951     case OMPD_parallel_master_taskloop:
8952     case OMPD_parallel_master_taskloop_simd:
8953     case OMPD_requires:
8954     case OMPD_unknown:
8955     default:
8956       llvm_unreachable("Unexpected directive.");
8957     }
8958   }
8959 
8960   return nullptr;
8961 }
8962 
8963 /// Emit the user-defined mapper function. The code generation follows the
8964 /// pattern in the example below.
8965 /// \code
8966 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8967 ///                                           void *base, void *begin,
8968 ///                                           int64_t size, int64_t type) {
8969 ///   // Allocate space for an array section first.
8970 ///   if (size > 1 && !maptype.IsDelete)
8971 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8972 ///                                 size*sizeof(Ty), clearToFrom(type));
8973 ///   // Map members.
8974 ///   for (unsigned i = 0; i < size; i++) {
8975 ///     // For each component specified by this mapper:
8976 ///     for (auto c : all_components) {
8977 ///       if (c.hasMapper())
8978 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8979 ///                       c.arg_type);
8980 ///       else
8981 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8982 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8983 ///     }
8984 ///   }
8985 ///   // Delete the array section.
8986 ///   if (size > 1 && maptype.IsDelete)
8987 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8988 ///                                 size*sizeof(Ty), clearToFrom(type));
8989 /// }
8990 /// \endcode
8991 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8992                                             CodeGenFunction *CGF) {
8993   if (UDMMap.count(D) > 0)
8994     return;
8995   ASTContext &C = CGM.getContext();
8996   QualType Ty = D->getType();
8997   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8998   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8999   auto *MapperVarDecl =
9000       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9001   SourceLocation Loc = D->getLocation();
9002   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9003 
9004   // Prepare mapper function arguments and attributes.
9005   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9006                               C.VoidPtrTy, ImplicitParamDecl::Other);
9007   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9008                             ImplicitParamDecl::Other);
9009   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9010                              C.VoidPtrTy, ImplicitParamDecl::Other);
9011   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9012                             ImplicitParamDecl::Other);
9013   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9014                             ImplicitParamDecl::Other);
9015   FunctionArgList Args;
9016   Args.push_back(&HandleArg);
9017   Args.push_back(&BaseArg);
9018   Args.push_back(&BeginArg);
9019   Args.push_back(&SizeArg);
9020   Args.push_back(&TypeArg);
9021   const CGFunctionInfo &FnInfo =
9022       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9023   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9024   SmallString<64> TyStr;
9025   llvm::raw_svector_ostream Out(TyStr);
9026   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9027   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9028   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9029                                     Name, &CGM.getModule());
9030   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9031   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9032   // Start the mapper function code generation.
9033   CodeGenFunction MapperCGF(CGM);
9034   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9035   // Compute the starting and end addreses of array elements.
9036   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9037       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9038       C.getPointerType(Int64Ty), Loc);
9039   // Convert the size in bytes into the number of array elements.
9040   Size = MapperCGF.Builder.CreateExactUDiv(
9041       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9042   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9043       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9044       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9045   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9046   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9047       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9048       C.getPointerType(Int64Ty), Loc);
9049   // Prepare common arguments for array initiation and deletion.
9050   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9051       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9052       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9053   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9054       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9055       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9056   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9057       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9058       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9059 
9060   // Emit array initiation if this is an array section and \p MapType indicates
9061   // that memory allocation is required.
9062   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9063   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9064                              ElementSize, HeadBB, /*IsInit=*/true);
9065 
9066   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9067 
9068   // Emit the loop header block.
9069   MapperCGF.EmitBlock(HeadBB);
9070   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9071   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9072   // Evaluate whether the initial condition is satisfied.
9073   llvm::Value *IsEmpty =
9074       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9075   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9076   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9077 
9078   // Emit the loop body block.
9079   MapperCGF.EmitBlock(BodyBB);
9080   llvm::BasicBlock *LastBB = BodyBB;
9081   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9082       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9083   PtrPHI->addIncoming(PtrBegin, EntryBB);
9084   Address PtrCurrent =
9085       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9086                           .getAlignment()
9087                           .alignmentOfArrayElement(ElementSize));
9088   // Privatize the declared variable of mapper to be the current array element.
9089   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9090   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9091     return MapperCGF
9092         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9093         .getAddress(MapperCGF);
9094   });
9095   (void)Scope.Privatize();
9096 
9097   // Get map clause information. Fill up the arrays with all mapped variables.
9098   MappableExprsHandler::MapCombinedInfoTy Info;
9099   MappableExprsHandler MEHandler(*D, MapperCGF);
9100   MEHandler.generateAllInfoForMapper(Info);
9101 
9102   // Call the runtime API __tgt_mapper_num_components to get the number of
9103   // pre-existing components.
9104   llvm::Value *OffloadingArgs[] = {Handle};
9105   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9106       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9107                                             OMPRTL___tgt_mapper_num_components),
9108       OffloadingArgs);
9109   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9110       PreviousSize,
9111       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9112 
9113   // Fill up the runtime mapper handle for all components.
9114   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9115     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9116         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9117     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9118         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9119     llvm::Value *CurSizeArg = Info.Sizes[I];
9120 
9121     // Extract the MEMBER_OF field from the map type.
9122     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9123     MapperCGF.EmitBlock(MemberBB);
9124     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9125     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9126         OriMapType,
9127         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9128     llvm::BasicBlock *MemberCombineBB =
9129         MapperCGF.createBasicBlock("omp.member.combine");
9130     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9131     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9132     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9133     // Add the number of pre-existing components to the MEMBER_OF field if it
9134     // is valid.
9135     MapperCGF.EmitBlock(MemberCombineBB);
9136     llvm::Value *CombinedMember =
9137         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9138     // Do nothing if it is not a member of previous components.
9139     MapperCGF.EmitBlock(TypeBB);
9140     llvm::PHINode *MemberMapType =
9141         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9142     MemberMapType->addIncoming(OriMapType, MemberBB);
9143     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9144 
9145     // Combine the map type inherited from user-defined mapper with that
9146     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9147     // bits of the \a MapType, which is the input argument of the mapper
9148     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9149     // bits of MemberMapType.
9150     // [OpenMP 5.0], 1.2.6. map-type decay.
9151     //        | alloc |  to   | from  | tofrom | release | delete
9152     // ----------------------------------------------------------
9153     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9154     // to     | alloc |  to   | alloc |   to   | release | delete
9155     // from   | alloc | alloc | from  |  from  | release | delete
9156     // tofrom | alloc |  to   | from  | tofrom | release | delete
9157     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9158         MapType,
9159         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9160                                    MappableExprsHandler::OMP_MAP_FROM));
9161     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9162     llvm::BasicBlock *AllocElseBB =
9163         MapperCGF.createBasicBlock("omp.type.alloc.else");
9164     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9165     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9166     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9167     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9168     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9169     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9170     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9171     MapperCGF.EmitBlock(AllocBB);
9172     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9173         MemberMapType,
9174         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9175                                      MappableExprsHandler::OMP_MAP_FROM)));
9176     MapperCGF.Builder.CreateBr(EndBB);
9177     MapperCGF.EmitBlock(AllocElseBB);
9178     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9179         LeftToFrom,
9180         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9181     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9182     // In case of to, clear OMP_MAP_FROM.
9183     MapperCGF.EmitBlock(ToBB);
9184     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9185         MemberMapType,
9186         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9187     MapperCGF.Builder.CreateBr(EndBB);
9188     MapperCGF.EmitBlock(ToElseBB);
9189     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9190         LeftToFrom,
9191         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9192     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9193     // In case of from, clear OMP_MAP_TO.
9194     MapperCGF.EmitBlock(FromBB);
9195     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9196         MemberMapType,
9197         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9198     // In case of tofrom, do nothing.
9199     MapperCGF.EmitBlock(EndBB);
9200     LastBB = EndBB;
9201     llvm::PHINode *CurMapType =
9202         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9203     CurMapType->addIncoming(AllocMapType, AllocBB);
9204     CurMapType->addIncoming(ToMapType, ToBB);
9205     CurMapType->addIncoming(FromMapType, FromBB);
9206     CurMapType->addIncoming(MemberMapType, ToElseBB);
9207 
9208     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9209                                      CurSizeArg, CurMapType};
9210     if (Info.Mappers[I]) {
9211       // Call the corresponding mapper function.
9212       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9213           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9214       assert(MapperFunc && "Expect a valid mapper function is available.");
9215       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9216     } else {
9217       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9218       // data structure.
9219       MapperCGF.EmitRuntimeCall(
9220           OMPBuilder.getOrCreateRuntimeFunction(
9221               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9222           OffloadingArgs);
9223     }
9224   }
9225 
9226   // Update the pointer to point to the next element that needs to be mapped,
9227   // and check whether we have mapped all elements.
9228   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9229       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9230   PtrPHI->addIncoming(PtrNext, LastBB);
9231   llvm::Value *IsDone =
9232       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9233   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9234   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9235 
9236   MapperCGF.EmitBlock(ExitBB);
9237   // Emit array deletion if this is an array section and \p MapType indicates
9238   // that deletion is required.
9239   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9240                              ElementSize, DoneBB, /*IsInit=*/false);
9241 
9242   // Emit the function exit block.
9243   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9244   MapperCGF.FinishFunction();
9245   UDMMap.try_emplace(D, Fn);
9246   if (CGF) {
9247     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9248     Decls.second.push_back(D);
9249   }
9250 }
9251 
9252 /// Emit the array initialization or deletion portion for user-defined mapper
9253 /// code generation. First, it evaluates whether an array section is mapped and
9254 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9255 /// true, and \a MapType indicates to not delete this array, array
9256 /// initialization code is generated. If \a IsInit is false, and \a MapType
9257 /// indicates to not this array, array deletion code is generated.
9258 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9259     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9260     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9261     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9262   StringRef Prefix = IsInit ? ".init" : ".del";
9263 
9264   // Evaluate if this is an array section.
9265   llvm::BasicBlock *IsDeleteBB =
9266       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9267   llvm::BasicBlock *BodyBB =
9268       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9269   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9270       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9271   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9272 
9273   // Evaluate if we are going to delete this section.
9274   MapperCGF.EmitBlock(IsDeleteBB);
9275   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9276       MapType,
9277       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9278   llvm::Value *DeleteCond;
9279   if (IsInit) {
9280     DeleteCond = MapperCGF.Builder.CreateIsNull(
9281         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9282   } else {
9283     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9284         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9285   }
9286   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9287 
9288   MapperCGF.EmitBlock(BodyBB);
9289   // Get the array size by multiplying element size and element number (i.e., \p
9290   // Size).
9291   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9292       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9293   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9294   // memory allocation/deletion purpose only.
9295   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9296       MapType,
9297       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9298                                    MappableExprsHandler::OMP_MAP_FROM)));
9299   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9300   // data structure.
9301   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9302   MapperCGF.EmitRuntimeCall(
9303       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9304                                             OMPRTL___tgt_push_mapper_component),
9305       OffloadingArgs);
9306 }
9307 
9308 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9309     const OMPDeclareMapperDecl *D) {
9310   auto I = UDMMap.find(D);
9311   if (I != UDMMap.end())
9312     return I->second;
9313   emitUserDefinedMapper(D);
9314   return UDMMap.lookup(D);
9315 }
9316 
9317 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9318     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9319     llvm::Value *DeviceID,
9320     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9321                                      const OMPLoopDirective &D)>
9322         SizeEmitter) {
9323   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9324   const OMPExecutableDirective *TD = &D;
9325   // Get nested teams distribute kind directive, if any.
9326   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9327     TD = getNestedDistributeDirective(CGM.getContext(), D);
9328   if (!TD)
9329     return;
9330   const auto *LD = cast<OMPLoopDirective>(TD);
9331   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9332                                                      PrePostActionTy &) {
9333     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9334       llvm::Value *Args[] = {DeviceID, NumIterations};
9335       CGF.EmitRuntimeCall(
9336           OMPBuilder.getOrCreateRuntimeFunction(
9337               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9338           Args);
9339     }
9340   };
9341   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9342 }
9343 
9344 void CGOpenMPRuntime::emitTargetCall(
9345     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9346     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9347     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9348     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9349                                      const OMPLoopDirective &D)>
9350         SizeEmitter) {
9351   if (!CGF.HaveInsertPoint())
9352     return;
9353 
9354   assert(OutlinedFn && "Invalid outlined function!");
9355 
9356   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9357   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9358   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9359   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9360                                             PrePostActionTy &) {
9361     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9362   };
9363   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9364 
9365   CodeGenFunction::OMPTargetDataInfo InputInfo;
9366   llvm::Value *MapTypesArray = nullptr;
9367   // Fill up the pointer arrays and transfer execution to the device.
9368   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9369                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9370                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9371     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9372       // Reverse offloading is not supported, so just execute on the host.
9373       if (RequiresOuterTask) {
9374         CapturedVars.clear();
9375         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9376       }
9377       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9378       return;
9379     }
9380 
9381     // On top of the arrays that were filled up, the target offloading call
9382     // takes as arguments the device id as well as the host pointer. The host
9383     // pointer is used by the runtime library to identify the current target
9384     // region, so it only has to be unique and not necessarily point to
9385     // anything. It could be the pointer to the outlined function that
9386     // implements the target region, but we aren't using that so that the
9387     // compiler doesn't need to keep that, and could therefore inline the host
9388     // function if proven worthwhile during optimization.
9389 
9390     // From this point on, we need to have an ID of the target region defined.
9391     assert(OutlinedFnID && "Invalid outlined function ID!");
9392 
9393     // Emit device ID if any.
9394     llvm::Value *DeviceID;
9395     if (Device.getPointer()) {
9396       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9397               Device.getInt() == OMPC_DEVICE_device_num) &&
9398              "Expected device_num modifier.");
9399       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9400       DeviceID =
9401           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9402     } else {
9403       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9404     }
9405 
9406     // Emit the number of elements in the offloading arrays.
9407     llvm::Value *PointerNum =
9408         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9409 
9410     // Return value of the runtime offloading call.
9411     llvm::Value *Return;
9412 
9413     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9414     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9415 
9416     // Emit tripcount for the target loop-based directive.
9417     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9418 
9419     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9420     // The target region is an outlined function launched by the runtime
9421     // via calls __tgt_target() or __tgt_target_teams().
9422     //
9423     // __tgt_target() launches a target region with one team and one thread,
9424     // executing a serial region.  This master thread may in turn launch
9425     // more threads within its team upon encountering a parallel region,
9426     // however, no additional teams can be launched on the device.
9427     //
9428     // __tgt_target_teams() launches a target region with one or more teams,
9429     // each with one or more threads.  This call is required for target
9430     // constructs such as:
9431     //  'target teams'
9432     //  'target' / 'teams'
9433     //  'target teams distribute parallel for'
9434     //  'target parallel'
9435     // and so on.
9436     //
9437     // Note that on the host and CPU targets, the runtime implementation of
9438     // these calls simply call the outlined function without forking threads.
9439     // The outlined functions themselves have runtime calls to
9440     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9441     // the compiler in emitTeamsCall() and emitParallelCall().
9442     //
9443     // In contrast, on the NVPTX target, the implementation of
9444     // __tgt_target_teams() launches a GPU kernel with the requested number
9445     // of teams and threads so no additional calls to the runtime are required.
9446     if (NumTeams) {
9447       // If we have NumTeams defined this means that we have an enclosed teams
9448       // region. Therefore we also expect to have NumThreads defined. These two
9449       // values should be defined in the presence of a teams directive,
9450       // regardless of having any clauses associated. If the user is using teams
9451       // but no clauses, these two values will be the default that should be
9452       // passed to the runtime library - a 32-bit integer with the value zero.
9453       assert(NumThreads && "Thread limit expression should be available along "
9454                            "with number of teams.");
9455       llvm::Value *OffloadingArgs[] = {DeviceID,
9456                                        OutlinedFnID,
9457                                        PointerNum,
9458                                        InputInfo.BasePointersArray.getPointer(),
9459                                        InputInfo.PointersArray.getPointer(),
9460                                        InputInfo.SizesArray.getPointer(),
9461                                        MapTypesArray,
9462                                        InputInfo.MappersArray.getPointer(),
9463                                        NumTeams,
9464                                        NumThreads};
9465       Return = CGF.EmitRuntimeCall(
9466           OMPBuilder.getOrCreateRuntimeFunction(
9467               CGM.getModule(), HasNowait
9468                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9469                                    : OMPRTL___tgt_target_teams_mapper),
9470           OffloadingArgs);
9471     } else {
9472       llvm::Value *OffloadingArgs[] = {DeviceID,
9473                                        OutlinedFnID,
9474                                        PointerNum,
9475                                        InputInfo.BasePointersArray.getPointer(),
9476                                        InputInfo.PointersArray.getPointer(),
9477                                        InputInfo.SizesArray.getPointer(),
9478                                        MapTypesArray,
9479                                        InputInfo.MappersArray.getPointer()};
9480       Return = CGF.EmitRuntimeCall(
9481           OMPBuilder.getOrCreateRuntimeFunction(
9482               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9483                                          : OMPRTL___tgt_target_mapper),
9484           OffloadingArgs);
9485     }
9486 
9487     // Check the error code and execute the host version if required.
9488     llvm::BasicBlock *OffloadFailedBlock =
9489         CGF.createBasicBlock("omp_offload.failed");
9490     llvm::BasicBlock *OffloadContBlock =
9491         CGF.createBasicBlock("omp_offload.cont");
9492     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9493     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9494 
9495     CGF.EmitBlock(OffloadFailedBlock);
9496     if (RequiresOuterTask) {
9497       CapturedVars.clear();
9498       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9499     }
9500     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9501     CGF.EmitBranch(OffloadContBlock);
9502 
9503     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9504   };
9505 
9506   // Notify that the host version must be executed.
9507   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9508                     RequiresOuterTask](CodeGenFunction &CGF,
9509                                        PrePostActionTy &) {
9510     if (RequiresOuterTask) {
9511       CapturedVars.clear();
9512       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9513     }
9514     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9515   };
9516 
9517   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9518                           &CapturedVars, RequiresOuterTask,
9519                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9520     // Fill up the arrays with all the captured variables.
9521     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9522 
9523     // Get mappable expression information.
9524     MappableExprsHandler MEHandler(D, CGF);
9525     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9526     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9527 
9528     auto RI = CS.getCapturedRecordDecl()->field_begin();
9529     auto CV = CapturedVars.begin();
9530     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9531                                               CE = CS.capture_end();
9532          CI != CE; ++CI, ++RI, ++CV) {
9533       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9534       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9535 
9536       // VLA sizes are passed to the outlined region by copy and do not have map
9537       // information associated.
9538       if (CI->capturesVariableArrayType()) {
9539         CurInfo.BasePointers.push_back(*CV);
9540         CurInfo.Pointers.push_back(*CV);
9541         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9542             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9543         // Copy to the device as an argument. No need to retrieve it.
9544         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9545                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9546                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9547         CurInfo.Mappers.push_back(nullptr);
9548       } else {
9549         // If we have any information in the map clause, we use it, otherwise we
9550         // just do a default mapping.
9551         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9552         if (!CI->capturesThis())
9553           MappedVarSet.insert(CI->getCapturedVar());
9554         else
9555           MappedVarSet.insert(nullptr);
9556         if (CurInfo.BasePointers.empty())
9557           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9558         // Generate correct mapping for variables captured by reference in
9559         // lambdas.
9560         if (CI->capturesVariable())
9561           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9562                                                   CurInfo, LambdaPointers);
9563       }
9564       // We expect to have at least an element of information for this capture.
9565       assert(!CurInfo.BasePointers.empty() &&
9566              "Non-existing map pointer for capture!");
9567       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9568              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9569              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9570              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9571              "Inconsistent map information sizes!");
9572 
9573       // If there is an entry in PartialStruct it means we have a struct with
9574       // individual members mapped. Emit an extra combined entry.
9575       if (PartialStruct.Base.isValid())
9576         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9577 
9578       // We need to append the results of this capture to what we already have.
9579       CombinedInfo.append(CurInfo);
9580     }
9581     // Adjust MEMBER_OF flags for the lambdas captures.
9582     MEHandler.adjustMemberOfForLambdaCaptures(
9583         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9584         CombinedInfo.Types);
9585     // Map any list items in a map clause that were not captures because they
9586     // weren't referenced within the construct.
9587     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9588 
9589     TargetDataInfo Info;
9590     // Fill up the arrays and create the arguments.
9591     emitOffloadingArrays(CGF, CombinedInfo, Info);
9592     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9593                                  Info.PointersArray, Info.SizesArray,
9594                                  Info.MapTypesArray, Info.MappersArray, Info);
9595     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9596     InputInfo.BasePointersArray =
9597         Address(Info.BasePointersArray, CGM.getPointerAlign());
9598     InputInfo.PointersArray =
9599         Address(Info.PointersArray, CGM.getPointerAlign());
9600     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9601     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9602     MapTypesArray = Info.MapTypesArray;
9603     if (RequiresOuterTask)
9604       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9605     else
9606       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9607   };
9608 
9609   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9610                              CodeGenFunction &CGF, PrePostActionTy &) {
9611     if (RequiresOuterTask) {
9612       CodeGenFunction::OMPTargetDataInfo InputInfo;
9613       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9614     } else {
9615       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9616     }
9617   };
9618 
9619   // If we have a target function ID it means that we need to support
9620   // offloading, otherwise, just execute on the host. We need to execute on host
9621   // regardless of the conditional in the if clause if, e.g., the user do not
9622   // specify target triples.
9623   if (OutlinedFnID) {
9624     if (IfCond) {
9625       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9626     } else {
9627       RegionCodeGenTy ThenRCG(TargetThenGen);
9628       ThenRCG(CGF);
9629     }
9630   } else {
9631     RegionCodeGenTy ElseRCG(TargetElseGen);
9632     ElseRCG(CGF);
9633   }
9634 }
9635 
9636 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9637                                                     StringRef ParentName) {
9638   if (!S)
9639     return;
9640 
9641   // Codegen OMP target directives that offload compute to the device.
9642   bool RequiresDeviceCodegen =
9643       isa<OMPExecutableDirective>(S) &&
9644       isOpenMPTargetExecutionDirective(
9645           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9646 
9647   if (RequiresDeviceCodegen) {
9648     const auto &E = *cast<OMPExecutableDirective>(S);
9649     unsigned DeviceID;
9650     unsigned FileID;
9651     unsigned Line;
9652     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9653                              FileID, Line);
9654 
9655     // Is this a target region that should not be emitted as an entry point? If
9656     // so just signal we are done with this target region.
9657     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9658                                                             ParentName, Line))
9659       return;
9660 
9661     switch (E.getDirectiveKind()) {
9662     case OMPD_target:
9663       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9664                                                    cast<OMPTargetDirective>(E));
9665       break;
9666     case OMPD_target_parallel:
9667       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9668           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9669       break;
9670     case OMPD_target_teams:
9671       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9672           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9673       break;
9674     case OMPD_target_teams_distribute:
9675       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9676           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9677       break;
9678     case OMPD_target_teams_distribute_simd:
9679       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9680           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9681       break;
9682     case OMPD_target_parallel_for:
9683       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9684           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9685       break;
9686     case OMPD_target_parallel_for_simd:
9687       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9688           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9689       break;
9690     case OMPD_target_simd:
9691       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9692           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9693       break;
9694     case OMPD_target_teams_distribute_parallel_for:
9695       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9696           CGM, ParentName,
9697           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9698       break;
9699     case OMPD_target_teams_distribute_parallel_for_simd:
9700       CodeGenFunction::
9701           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9702               CGM, ParentName,
9703               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9704       break;
9705     case OMPD_parallel:
9706     case OMPD_for:
9707     case OMPD_parallel_for:
9708     case OMPD_parallel_master:
9709     case OMPD_parallel_sections:
9710     case OMPD_for_simd:
9711     case OMPD_parallel_for_simd:
9712     case OMPD_cancel:
9713     case OMPD_cancellation_point:
9714     case OMPD_ordered:
9715     case OMPD_threadprivate:
9716     case OMPD_allocate:
9717     case OMPD_task:
9718     case OMPD_simd:
9719     case OMPD_sections:
9720     case OMPD_section:
9721     case OMPD_single:
9722     case OMPD_master:
9723     case OMPD_critical:
9724     case OMPD_taskyield:
9725     case OMPD_barrier:
9726     case OMPD_taskwait:
9727     case OMPD_taskgroup:
9728     case OMPD_atomic:
9729     case OMPD_flush:
9730     case OMPD_depobj:
9731     case OMPD_scan:
9732     case OMPD_teams:
9733     case OMPD_target_data:
9734     case OMPD_target_exit_data:
9735     case OMPD_target_enter_data:
9736     case OMPD_distribute:
9737     case OMPD_distribute_simd:
9738     case OMPD_distribute_parallel_for:
9739     case OMPD_distribute_parallel_for_simd:
9740     case OMPD_teams_distribute:
9741     case OMPD_teams_distribute_simd:
9742     case OMPD_teams_distribute_parallel_for:
9743     case OMPD_teams_distribute_parallel_for_simd:
9744     case OMPD_target_update:
9745     case OMPD_declare_simd:
9746     case OMPD_declare_variant:
9747     case OMPD_begin_declare_variant:
9748     case OMPD_end_declare_variant:
9749     case OMPD_declare_target:
9750     case OMPD_end_declare_target:
9751     case OMPD_declare_reduction:
9752     case OMPD_declare_mapper:
9753     case OMPD_taskloop:
9754     case OMPD_taskloop_simd:
9755     case OMPD_master_taskloop:
9756     case OMPD_master_taskloop_simd:
9757     case OMPD_parallel_master_taskloop:
9758     case OMPD_parallel_master_taskloop_simd:
9759     case OMPD_requires:
9760     case OMPD_unknown:
9761     default:
9762       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9763     }
9764     return;
9765   }
9766 
9767   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9768     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9769       return;
9770 
9771     scanForTargetRegionsFunctions(
9772         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9773     return;
9774   }
9775 
9776   // If this is a lambda function, look into its body.
9777   if (const auto *L = dyn_cast<LambdaExpr>(S))
9778     S = L->getBody();
9779 
9780   // Keep looking for target regions recursively.
9781   for (const Stmt *II : S->children())
9782     scanForTargetRegionsFunctions(II, ParentName);
9783 }
9784 
9785 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9786   // If emitting code for the host, we do not process FD here. Instead we do
9787   // the normal code generation.
9788   if (!CGM.getLangOpts().OpenMPIsDevice) {
9789     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9790       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9791           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9792       // Do not emit device_type(nohost) functions for the host.
9793       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9794         return true;
9795     }
9796     return false;
9797   }
9798 
9799   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9800   // Try to detect target regions in the function.
9801   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9802     StringRef Name = CGM.getMangledName(GD);
9803     scanForTargetRegionsFunctions(FD->getBody(), Name);
9804     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9805         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9806     // Do not emit device_type(nohost) functions for the host.
9807     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9808       return true;
9809   }
9810 
9811   // Do not to emit function if it is not marked as declare target.
9812   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9813          AlreadyEmittedTargetDecls.count(VD) == 0;
9814 }
9815 
9816 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9817   if (!CGM.getLangOpts().OpenMPIsDevice)
9818     return false;
9819 
9820   // Check if there are Ctors/Dtors in this declaration and look for target
9821   // regions in it. We use the complete variant to produce the kernel name
9822   // mangling.
9823   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9824   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9825     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9826       StringRef ParentName =
9827           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9828       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9829     }
9830     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9831       StringRef ParentName =
9832           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9833       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9834     }
9835   }
9836 
9837   // Do not to emit variable if it is not marked as declare target.
9838   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9839       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9840           cast<VarDecl>(GD.getDecl()));
9841   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9842       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9843        HasRequiresUnifiedSharedMemory)) {
9844     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9845     return true;
9846   }
9847   return false;
9848 }
9849 
9850 llvm::Constant *
9851 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9852                                                 const VarDecl *VD) {
9853   assert(VD->getType().isConstant(CGM.getContext()) &&
9854          "Expected constant variable.");
9855   StringRef VarName;
9856   llvm::Constant *Addr;
9857   llvm::GlobalValue::LinkageTypes Linkage;
9858   QualType Ty = VD->getType();
9859   SmallString<128> Buffer;
9860   {
9861     unsigned DeviceID;
9862     unsigned FileID;
9863     unsigned Line;
9864     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9865                              FileID, Line);
9866     llvm::raw_svector_ostream OS(Buffer);
9867     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9868        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9869     VarName = OS.str();
9870   }
9871   Linkage = llvm::GlobalValue::InternalLinkage;
9872   Addr =
9873       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9874                                   getDefaultFirstprivateAddressSpace());
9875   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9876   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9877   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9878   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9879       VarName, Addr, VarSize,
9880       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9881   return Addr;
9882 }
9883 
9884 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9885                                                    llvm::Constant *Addr) {
9886   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9887       !CGM.getLangOpts().OpenMPIsDevice)
9888     return;
9889   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9890       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9891   if (!Res) {
9892     if (CGM.getLangOpts().OpenMPIsDevice) {
9893       // Register non-target variables being emitted in device code (debug info
9894       // may cause this).
9895       StringRef VarName = CGM.getMangledName(VD);
9896       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9897     }
9898     return;
9899   }
9900   // Register declare target variables.
9901   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9902   StringRef VarName;
9903   CharUnits VarSize;
9904   llvm::GlobalValue::LinkageTypes Linkage;
9905 
9906   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9907       !HasRequiresUnifiedSharedMemory) {
9908     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9909     VarName = CGM.getMangledName(VD);
9910     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9911       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9912       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9913     } else {
9914       VarSize = CharUnits::Zero();
9915     }
9916     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9917     // Temp solution to prevent optimizations of the internal variables.
9918     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9919       std::string RefName = getName({VarName, "ref"});
9920       if (!CGM.GetGlobalValue(RefName)) {
9921         llvm::Constant *AddrRef =
9922             getOrCreateInternalVariable(Addr->getType(), RefName);
9923         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9924         GVAddrRef->setConstant(/*Val=*/true);
9925         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9926         GVAddrRef->setInitializer(Addr);
9927         CGM.addCompilerUsedGlobal(GVAddrRef);
9928       }
9929     }
9930   } else {
9931     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9932             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9933              HasRequiresUnifiedSharedMemory)) &&
9934            "Declare target attribute must link or to with unified memory.");
9935     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9936       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9937     else
9938       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9939 
9940     if (CGM.getLangOpts().OpenMPIsDevice) {
9941       VarName = Addr->getName();
9942       Addr = nullptr;
9943     } else {
9944       VarName = getAddrOfDeclareTargetVar(VD).getName();
9945       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9946     }
9947     VarSize = CGM.getPointerSize();
9948     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9949   }
9950 
9951   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9952       VarName, Addr, VarSize, Flags, Linkage);
9953 }
9954 
9955 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9956   if (isa<FunctionDecl>(GD.getDecl()) ||
9957       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9958     return emitTargetFunctions(GD);
9959 
9960   return emitTargetGlobalVariable(GD);
9961 }
9962 
9963 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9964   for (const VarDecl *VD : DeferredGlobalVariables) {
9965     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9966         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9967     if (!Res)
9968       continue;
9969     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9970         !HasRequiresUnifiedSharedMemory) {
9971       CGM.EmitGlobal(VD);
9972     } else {
9973       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9974               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9975                HasRequiresUnifiedSharedMemory)) &&
9976              "Expected link clause or to clause with unified memory.");
9977       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9978     }
9979   }
9980 }
9981 
9982 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9983     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9984   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9985          " Expected target-based directive.");
9986 }
9987 
9988 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9989   for (const OMPClause *Clause : D->clauselists()) {
9990     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9991       HasRequiresUnifiedSharedMemory = true;
9992     } else if (const auto *AC =
9993                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9994       switch (AC->getAtomicDefaultMemOrderKind()) {
9995       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9996         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9997         break;
9998       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9999         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10000         break;
10001       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10002         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10003         break;
10004       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10005         break;
10006       }
10007     }
10008   }
10009 }
10010 
10011 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10012   return RequiresAtomicOrdering;
10013 }
10014 
10015 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10016                                                        LangAS &AS) {
10017   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10018     return false;
10019   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10020   switch(A->getAllocatorType()) {
10021   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10022   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10023   // Not supported, fallback to the default mem space.
10024   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10025   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10026   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10027   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10028   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10029   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10030   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10031     AS = LangAS::Default;
10032     return true;
10033   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10034     llvm_unreachable("Expected predefined allocator for the variables with the "
10035                      "static storage.");
10036   }
10037   return false;
10038 }
10039 
10040 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10041   return HasRequiresUnifiedSharedMemory;
10042 }
10043 
10044 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10045     CodeGenModule &CGM)
10046     : CGM(CGM) {
10047   if (CGM.getLangOpts().OpenMPIsDevice) {
10048     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10049     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10050   }
10051 }
10052 
10053 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10054   if (CGM.getLangOpts().OpenMPIsDevice)
10055     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10056 }
10057 
10058 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10059   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10060     return true;
10061 
10062   const auto *D = cast<FunctionDecl>(GD.getDecl());
10063   // Do not to emit function if it is marked as declare target as it was already
10064   // emitted.
10065   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10066     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10067       if (auto *F = dyn_cast_or_null<llvm::Function>(
10068               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10069         return !F->isDeclaration();
10070       return false;
10071     }
10072     return true;
10073   }
10074 
10075   return !AlreadyEmittedTargetDecls.insert(D).second;
10076 }
10077 
10078 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10079   // If we don't have entries or if we are emitting code for the device, we
10080   // don't need to do anything.
10081   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10082       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10083       (OffloadEntriesInfoManager.empty() &&
10084        !HasEmittedDeclareTargetRegion &&
10085        !HasEmittedTargetRegion))
10086     return nullptr;
10087 
10088   // Create and register the function that handles the requires directives.
10089   ASTContext &C = CGM.getContext();
10090 
10091   llvm::Function *RequiresRegFn;
10092   {
10093     CodeGenFunction CGF(CGM);
10094     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10095     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10096     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10097     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10098     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10099     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10100     // TODO: check for other requires clauses.
10101     // The requires directive takes effect only when a target region is
10102     // present in the compilation unit. Otherwise it is ignored and not
10103     // passed to the runtime. This avoids the runtime from throwing an error
10104     // for mismatching requires clauses across compilation units that don't
10105     // contain at least 1 target region.
10106     assert((HasEmittedTargetRegion ||
10107             HasEmittedDeclareTargetRegion ||
10108             !OffloadEntriesInfoManager.empty()) &&
10109            "Target or declare target region expected.");
10110     if (HasRequiresUnifiedSharedMemory)
10111       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10113                             CGM.getModule(), OMPRTL___tgt_register_requires),
10114                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10115     CGF.FinishFunction();
10116   }
10117   return RequiresRegFn;
10118 }
10119 
10120 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10121                                     const OMPExecutableDirective &D,
10122                                     SourceLocation Loc,
10123                                     llvm::Function *OutlinedFn,
10124                                     ArrayRef<llvm::Value *> CapturedVars) {
10125   if (!CGF.HaveInsertPoint())
10126     return;
10127 
10128   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10129   CodeGenFunction::RunCleanupsScope Scope(CGF);
10130 
10131   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10132   llvm::Value *Args[] = {
10133       RTLoc,
10134       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10135       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10136   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10137   RealArgs.append(std::begin(Args), std::end(Args));
10138   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10139 
10140   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10141       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10142   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10143 }
10144 
10145 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10146                                          const Expr *NumTeams,
10147                                          const Expr *ThreadLimit,
10148                                          SourceLocation Loc) {
10149   if (!CGF.HaveInsertPoint())
10150     return;
10151 
10152   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10153 
10154   llvm::Value *NumTeamsVal =
10155       NumTeams
10156           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10157                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10158           : CGF.Builder.getInt32(0);
10159 
10160   llvm::Value *ThreadLimitVal =
10161       ThreadLimit
10162           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10163                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10164           : CGF.Builder.getInt32(0);
10165 
10166   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10167   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10168                                      ThreadLimitVal};
10169   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10170                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10171                       PushNumTeamsArgs);
10172 }
10173 
10174 void CGOpenMPRuntime::emitTargetDataCalls(
10175     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10176     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10177   if (!CGF.HaveInsertPoint())
10178     return;
10179 
10180   // Action used to replace the default codegen action and turn privatization
10181   // off.
10182   PrePostActionTy NoPrivAction;
10183 
10184   // Generate the code for the opening of the data environment. Capture all the
10185   // arguments of the runtime call by reference because they are used in the
10186   // closing of the region.
10187   auto &&BeginThenGen = [this, &D, Device, &Info,
10188                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10189     // Fill up the arrays with all the mapped variables.
10190     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10191 
10192     // Get map clause information.
10193     MappableExprsHandler MEHandler(D, CGF);
10194     MEHandler.generateAllInfo(CombinedInfo);
10195 
10196     // Fill up the arrays and create the arguments.
10197     emitOffloadingArrays(CGF, CombinedInfo, Info);
10198 
10199     llvm::Value *BasePointersArrayArg = nullptr;
10200     llvm::Value *PointersArrayArg = nullptr;
10201     llvm::Value *SizesArrayArg = nullptr;
10202     llvm::Value *MapTypesArrayArg = nullptr;
10203     llvm::Value *MappersArrayArg = nullptr;
10204     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10205                                  SizesArrayArg, MapTypesArrayArg,
10206                                  MappersArrayArg, Info, /*ForEndCall=*/false);
10207 
10208     // Emit device ID if any.
10209     llvm::Value *DeviceID = nullptr;
10210     if (Device) {
10211       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10212                                            CGF.Int64Ty, /*isSigned=*/true);
10213     } else {
10214       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10215     }
10216 
10217     // Emit the number of elements in the offloading arrays.
10218     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10219 
10220     llvm::Value *OffloadingArgs[] = {
10221         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10222         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10223     CGF.EmitRuntimeCall(
10224         OMPBuilder.getOrCreateRuntimeFunction(
10225             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10226         OffloadingArgs);
10227 
10228     // If device pointer privatization is required, emit the body of the region
10229     // here. It will have to be duplicated: with and without privatization.
10230     if (!Info.CaptureDeviceAddrMap.empty())
10231       CodeGen(CGF);
10232   };
10233 
10234   // Generate code for the closing of the data region.
10235   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10236                                             PrePostActionTy &) {
10237     assert(Info.isValid() && "Invalid data environment closing arguments.");
10238 
10239     llvm::Value *BasePointersArrayArg = nullptr;
10240     llvm::Value *PointersArrayArg = nullptr;
10241     llvm::Value *SizesArrayArg = nullptr;
10242     llvm::Value *MapTypesArrayArg = nullptr;
10243     llvm::Value *MappersArrayArg = nullptr;
10244     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10245                                  SizesArrayArg, MapTypesArrayArg,
10246                                  MappersArrayArg, Info, /*ForEndCall=*/true);
10247 
10248     // Emit device ID if any.
10249     llvm::Value *DeviceID = nullptr;
10250     if (Device) {
10251       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10252                                            CGF.Int64Ty, /*isSigned=*/true);
10253     } else {
10254       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10255     }
10256 
10257     // Emit the number of elements in the offloading arrays.
10258     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10259 
10260     llvm::Value *OffloadingArgs[] = {
10261         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10262         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10263     CGF.EmitRuntimeCall(
10264         OMPBuilder.getOrCreateRuntimeFunction(
10265             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10266         OffloadingArgs);
10267   };
10268 
10269   // If we need device pointer privatization, we need to emit the body of the
10270   // region with no privatization in the 'else' branch of the conditional.
10271   // Otherwise, we don't have to do anything.
10272   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10273                                                          PrePostActionTy &) {
10274     if (!Info.CaptureDeviceAddrMap.empty()) {
10275       CodeGen.setAction(NoPrivAction);
10276       CodeGen(CGF);
10277     }
10278   };
10279 
10280   // We don't have to do anything to close the region if the if clause evaluates
10281   // to false.
10282   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10283 
10284   if (IfCond) {
10285     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10286   } else {
10287     RegionCodeGenTy RCG(BeginThenGen);
10288     RCG(CGF);
10289   }
10290 
10291   // If we don't require privatization of device pointers, we emit the body in
10292   // between the runtime calls. This avoids duplicating the body code.
10293   if (Info.CaptureDeviceAddrMap.empty()) {
10294     CodeGen.setAction(NoPrivAction);
10295     CodeGen(CGF);
10296   }
10297 
10298   if (IfCond) {
10299     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10300   } else {
10301     RegionCodeGenTy RCG(EndThenGen);
10302     RCG(CGF);
10303   }
10304 }
10305 
10306 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10307     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10308     const Expr *Device) {
10309   if (!CGF.HaveInsertPoint())
10310     return;
10311 
10312   assert((isa<OMPTargetEnterDataDirective>(D) ||
10313           isa<OMPTargetExitDataDirective>(D) ||
10314           isa<OMPTargetUpdateDirective>(D)) &&
10315          "Expecting either target enter, exit data, or update directives.");
10316 
10317   CodeGenFunction::OMPTargetDataInfo InputInfo;
10318   llvm::Value *MapTypesArray = nullptr;
10319   // Generate the code for the opening of the data environment.
10320   auto &&ThenGen = [this, &D, Device, &InputInfo,
10321                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10322     // Emit device ID if any.
10323     llvm::Value *DeviceID = nullptr;
10324     if (Device) {
10325       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10326                                            CGF.Int64Ty, /*isSigned=*/true);
10327     } else {
10328       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10329     }
10330 
10331     // Emit the number of elements in the offloading arrays.
10332     llvm::Constant *PointerNum =
10333         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10334 
10335     llvm::Value *OffloadingArgs[] = {DeviceID,
10336                                      PointerNum,
10337                                      InputInfo.BasePointersArray.getPointer(),
10338                                      InputInfo.PointersArray.getPointer(),
10339                                      InputInfo.SizesArray.getPointer(),
10340                                      MapTypesArray,
10341                                      InputInfo.MappersArray.getPointer()};
10342 
10343     // Select the right runtime function call for each standalone
10344     // directive.
10345     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10346     RuntimeFunction RTLFn;
10347     switch (D.getDirectiveKind()) {
10348     case OMPD_target_enter_data:
10349       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10350                         : OMPRTL___tgt_target_data_begin_mapper;
10351       break;
10352     case OMPD_target_exit_data:
10353       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10354                         : OMPRTL___tgt_target_data_end_mapper;
10355       break;
10356     case OMPD_target_update:
10357       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10358                         : OMPRTL___tgt_target_data_update_mapper;
10359       break;
10360     case OMPD_parallel:
10361     case OMPD_for:
10362     case OMPD_parallel_for:
10363     case OMPD_parallel_master:
10364     case OMPD_parallel_sections:
10365     case OMPD_for_simd:
10366     case OMPD_parallel_for_simd:
10367     case OMPD_cancel:
10368     case OMPD_cancellation_point:
10369     case OMPD_ordered:
10370     case OMPD_threadprivate:
10371     case OMPD_allocate:
10372     case OMPD_task:
10373     case OMPD_simd:
10374     case OMPD_sections:
10375     case OMPD_section:
10376     case OMPD_single:
10377     case OMPD_master:
10378     case OMPD_critical:
10379     case OMPD_taskyield:
10380     case OMPD_barrier:
10381     case OMPD_taskwait:
10382     case OMPD_taskgroup:
10383     case OMPD_atomic:
10384     case OMPD_flush:
10385     case OMPD_depobj:
10386     case OMPD_scan:
10387     case OMPD_teams:
10388     case OMPD_target_data:
10389     case OMPD_distribute:
10390     case OMPD_distribute_simd:
10391     case OMPD_distribute_parallel_for:
10392     case OMPD_distribute_parallel_for_simd:
10393     case OMPD_teams_distribute:
10394     case OMPD_teams_distribute_simd:
10395     case OMPD_teams_distribute_parallel_for:
10396     case OMPD_teams_distribute_parallel_for_simd:
10397     case OMPD_declare_simd:
10398     case OMPD_declare_variant:
10399     case OMPD_begin_declare_variant:
10400     case OMPD_end_declare_variant:
10401     case OMPD_declare_target:
10402     case OMPD_end_declare_target:
10403     case OMPD_declare_reduction:
10404     case OMPD_declare_mapper:
10405     case OMPD_taskloop:
10406     case OMPD_taskloop_simd:
10407     case OMPD_master_taskloop:
10408     case OMPD_master_taskloop_simd:
10409     case OMPD_parallel_master_taskloop:
10410     case OMPD_parallel_master_taskloop_simd:
10411     case OMPD_target:
10412     case OMPD_target_simd:
10413     case OMPD_target_teams_distribute:
10414     case OMPD_target_teams_distribute_simd:
10415     case OMPD_target_teams_distribute_parallel_for:
10416     case OMPD_target_teams_distribute_parallel_for_simd:
10417     case OMPD_target_teams:
10418     case OMPD_target_parallel:
10419     case OMPD_target_parallel_for:
10420     case OMPD_target_parallel_for_simd:
10421     case OMPD_requires:
10422     case OMPD_unknown:
10423     default:
10424       llvm_unreachable("Unexpected standalone target data directive.");
10425       break;
10426     }
10427     CGF.EmitRuntimeCall(
10428         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10429         OffloadingArgs);
10430   };
10431 
10432   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10433                              CodeGenFunction &CGF, PrePostActionTy &) {
10434     // Fill up the arrays with all the mapped variables.
10435     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10436 
10437     // Get map clause information.
10438     MappableExprsHandler MEHandler(D, CGF);
10439     MEHandler.generateAllInfo(CombinedInfo);
10440 
10441     TargetDataInfo Info;
10442     // Fill up the arrays and create the arguments.
10443     emitOffloadingArrays(CGF, CombinedInfo, Info);
10444     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10445                                  Info.PointersArray, Info.SizesArray,
10446                                  Info.MapTypesArray, Info.MappersArray, Info);
10447     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10448     InputInfo.BasePointersArray =
10449         Address(Info.BasePointersArray, CGM.getPointerAlign());
10450     InputInfo.PointersArray =
10451         Address(Info.PointersArray, CGM.getPointerAlign());
10452     InputInfo.SizesArray =
10453         Address(Info.SizesArray, CGM.getPointerAlign());
10454     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10455     MapTypesArray = Info.MapTypesArray;
10456     if (D.hasClausesOfKind<OMPDependClause>())
10457       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10458     else
10459       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10460   };
10461 
10462   if (IfCond) {
10463     emitIfClause(CGF, IfCond, TargetThenGen,
10464                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10465   } else {
10466     RegionCodeGenTy ThenRCG(TargetThenGen);
10467     ThenRCG(CGF);
10468   }
10469 }
10470 
10471 namespace {
10472   /// Kind of parameter in a function with 'declare simd' directive.
10473   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10474   /// Attribute set of the parameter.
10475   struct ParamAttrTy {
10476     ParamKindTy Kind = Vector;
10477     llvm::APSInt StrideOrArg;
10478     llvm::APSInt Alignment;
10479   };
10480 } // namespace
10481 
10482 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10483                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10484   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10485   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10486   // of that clause. The VLEN value must be power of 2.
10487   // In other case the notion of the function`s "characteristic data type" (CDT)
10488   // is used to compute the vector length.
10489   // CDT is defined in the following order:
10490   //   a) For non-void function, the CDT is the return type.
10491   //   b) If the function has any non-uniform, non-linear parameters, then the
10492   //   CDT is the type of the first such parameter.
10493   //   c) If the CDT determined by a) or b) above is struct, union, or class
10494   //   type which is pass-by-value (except for the type that maps to the
10495   //   built-in complex data type), the characteristic data type is int.
10496   //   d) If none of the above three cases is applicable, the CDT is int.
10497   // The VLEN is then determined based on the CDT and the size of vector
10498   // register of that ISA for which current vector version is generated. The
10499   // VLEN is computed using the formula below:
10500   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10501   // where vector register size specified in section 3.2.1 Registers and the
10502   // Stack Frame of original AMD64 ABI document.
10503   QualType RetType = FD->getReturnType();
10504   if (RetType.isNull())
10505     return 0;
10506   ASTContext &C = FD->getASTContext();
10507   QualType CDT;
10508   if (!RetType.isNull() && !RetType->isVoidType()) {
10509     CDT = RetType;
10510   } else {
10511     unsigned Offset = 0;
10512     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10513       if (ParamAttrs[Offset].Kind == Vector)
10514         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10515       ++Offset;
10516     }
10517     if (CDT.isNull()) {
10518       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10519         if (ParamAttrs[I + Offset].Kind == Vector) {
10520           CDT = FD->getParamDecl(I)->getType();
10521           break;
10522         }
10523       }
10524     }
10525   }
10526   if (CDT.isNull())
10527     CDT = C.IntTy;
10528   CDT = CDT->getCanonicalTypeUnqualified();
10529   if (CDT->isRecordType() || CDT->isUnionType())
10530     CDT = C.IntTy;
10531   return C.getTypeSize(CDT);
10532 }
10533 
10534 static void
10535 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10536                            const llvm::APSInt &VLENVal,
10537                            ArrayRef<ParamAttrTy> ParamAttrs,
10538                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10539   struct ISADataTy {
10540     char ISA;
10541     unsigned VecRegSize;
10542   };
10543   ISADataTy ISAData[] = {
10544       {
10545           'b', 128
10546       }, // SSE
10547       {
10548           'c', 256
10549       }, // AVX
10550       {
10551           'd', 256
10552       }, // AVX2
10553       {
10554           'e', 512
10555       }, // AVX512
10556   };
10557   llvm::SmallVector<char, 2> Masked;
10558   switch (State) {
10559   case OMPDeclareSimdDeclAttr::BS_Undefined:
10560     Masked.push_back('N');
10561     Masked.push_back('M');
10562     break;
10563   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10564     Masked.push_back('N');
10565     break;
10566   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10567     Masked.push_back('M');
10568     break;
10569   }
10570   for (char Mask : Masked) {
10571     for (const ISADataTy &Data : ISAData) {
10572       SmallString<256> Buffer;
10573       llvm::raw_svector_ostream Out(Buffer);
10574       Out << "_ZGV" << Data.ISA << Mask;
10575       if (!VLENVal) {
10576         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10577         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10578         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10579       } else {
10580         Out << VLENVal;
10581       }
10582       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10583         switch (ParamAttr.Kind){
10584         case LinearWithVarStride:
10585           Out << 's' << ParamAttr.StrideOrArg;
10586           break;
10587         case Linear:
10588           Out << 'l';
10589           if (ParamAttr.StrideOrArg != 1)
10590             Out << ParamAttr.StrideOrArg;
10591           break;
10592         case Uniform:
10593           Out << 'u';
10594           break;
10595         case Vector:
10596           Out << 'v';
10597           break;
10598         }
10599         if (!!ParamAttr.Alignment)
10600           Out << 'a' << ParamAttr.Alignment;
10601       }
10602       Out << '_' << Fn->getName();
10603       Fn->addFnAttr(Out.str());
10604     }
10605   }
10606 }
10607 
10608 // This are the Functions that are needed to mangle the name of the
10609 // vector functions generated by the compiler, according to the rules
10610 // defined in the "Vector Function ABI specifications for AArch64",
10611 // available at
10612 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10613 
10614 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10615 ///
10616 /// TODO: Need to implement the behavior for reference marked with a
10617 /// var or no linear modifiers (1.b in the section). For this, we
10618 /// need to extend ParamKindTy to support the linear modifiers.
10619 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10620   QT = QT.getCanonicalType();
10621 
10622   if (QT->isVoidType())
10623     return false;
10624 
10625   if (Kind == ParamKindTy::Uniform)
10626     return false;
10627 
10628   if (Kind == ParamKindTy::Linear)
10629     return false;
10630 
10631   // TODO: Handle linear references with modifiers
10632 
10633   if (Kind == ParamKindTy::LinearWithVarStride)
10634     return false;
10635 
10636   return true;
10637 }
10638 
10639 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10640 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10641   QT = QT.getCanonicalType();
10642   unsigned Size = C.getTypeSize(QT);
10643 
10644   // Only scalars and complex within 16 bytes wide set PVB to true.
10645   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10646     return false;
10647 
10648   if (QT->isFloatingType())
10649     return true;
10650 
10651   if (QT->isIntegerType())
10652     return true;
10653 
10654   if (QT->isPointerType())
10655     return true;
10656 
10657   // TODO: Add support for complex types (section 3.1.2, item 2).
10658 
10659   return false;
10660 }
10661 
10662 /// Computes the lane size (LS) of a return type or of an input parameter,
10663 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10664 /// TODO: Add support for references, section 3.2.1, item 1.
10665 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10666   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10667     QualType PTy = QT.getCanonicalType()->getPointeeType();
10668     if (getAArch64PBV(PTy, C))
10669       return C.getTypeSize(PTy);
10670   }
10671   if (getAArch64PBV(QT, C))
10672     return C.getTypeSize(QT);
10673 
10674   return C.getTypeSize(C.getUIntPtrType());
10675 }
10676 
10677 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10678 // signature of the scalar function, as defined in 3.2.2 of the
10679 // AAVFABI.
10680 static std::tuple<unsigned, unsigned, bool>
10681 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10682   QualType RetType = FD->getReturnType().getCanonicalType();
10683 
10684   ASTContext &C = FD->getASTContext();
10685 
10686   bool OutputBecomesInput = false;
10687 
10688   llvm::SmallVector<unsigned, 8> Sizes;
10689   if (!RetType->isVoidType()) {
10690     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10691     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10692       OutputBecomesInput = true;
10693   }
10694   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10695     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10696     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10697   }
10698 
10699   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10700   // The LS of a function parameter / return value can only be a power
10701   // of 2, starting from 8 bits, up to 128.
10702   assert(std::all_of(Sizes.begin(), Sizes.end(),
10703                      [](unsigned Size) {
10704                        return Size == 8 || Size == 16 || Size == 32 ||
10705                               Size == 64 || Size == 128;
10706                      }) &&
10707          "Invalid size");
10708 
10709   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10710                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10711                          OutputBecomesInput);
10712 }
10713 
10714 /// Mangle the parameter part of the vector function name according to
10715 /// their OpenMP classification. The mangling function is defined in
10716 /// section 3.5 of the AAVFABI.
10717 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10718   SmallString<256> Buffer;
10719   llvm::raw_svector_ostream Out(Buffer);
10720   for (const auto &ParamAttr : ParamAttrs) {
10721     switch (ParamAttr.Kind) {
10722     case LinearWithVarStride:
10723       Out << "ls" << ParamAttr.StrideOrArg;
10724       break;
10725     case Linear:
10726       Out << 'l';
10727       // Don't print the step value if it is not present or if it is
10728       // equal to 1.
10729       if (ParamAttr.StrideOrArg != 1)
10730         Out << ParamAttr.StrideOrArg;
10731       break;
10732     case Uniform:
10733       Out << 'u';
10734       break;
10735     case Vector:
10736       Out << 'v';
10737       break;
10738     }
10739 
10740     if (!!ParamAttr.Alignment)
10741       Out << 'a' << ParamAttr.Alignment;
10742   }
10743 
10744   return std::string(Out.str());
10745 }
10746 
10747 // Function used to add the attribute. The parameter `VLEN` is
10748 // templated to allow the use of "x" when targeting scalable functions
10749 // for SVE.
10750 template <typename T>
10751 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10752                                  char ISA, StringRef ParSeq,
10753                                  StringRef MangledName, bool OutputBecomesInput,
10754                                  llvm::Function *Fn) {
10755   SmallString<256> Buffer;
10756   llvm::raw_svector_ostream Out(Buffer);
10757   Out << Prefix << ISA << LMask << VLEN;
10758   if (OutputBecomesInput)
10759     Out << "v";
10760   Out << ParSeq << "_" << MangledName;
10761   Fn->addFnAttr(Out.str());
10762 }
10763 
10764 // Helper function to generate the Advanced SIMD names depending on
10765 // the value of the NDS when simdlen is not present.
10766 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10767                                       StringRef Prefix, char ISA,
10768                                       StringRef ParSeq, StringRef MangledName,
10769                                       bool OutputBecomesInput,
10770                                       llvm::Function *Fn) {
10771   switch (NDS) {
10772   case 8:
10773     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10774                          OutputBecomesInput, Fn);
10775     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10776                          OutputBecomesInput, Fn);
10777     break;
10778   case 16:
10779     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10780                          OutputBecomesInput, Fn);
10781     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10782                          OutputBecomesInput, Fn);
10783     break;
10784   case 32:
10785     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10786                          OutputBecomesInput, Fn);
10787     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10788                          OutputBecomesInput, Fn);
10789     break;
10790   case 64:
10791   case 128:
10792     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10793                          OutputBecomesInput, Fn);
10794     break;
10795   default:
10796     llvm_unreachable("Scalar type is too wide.");
10797   }
10798 }
10799 
10800 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10801 static void emitAArch64DeclareSimdFunction(
10802     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10803     ArrayRef<ParamAttrTy> ParamAttrs,
10804     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10805     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10806 
10807   // Get basic data for building the vector signature.
10808   const auto Data = getNDSWDS(FD, ParamAttrs);
10809   const unsigned NDS = std::get<0>(Data);
10810   const unsigned WDS = std::get<1>(Data);
10811   const bool OutputBecomesInput = std::get<2>(Data);
10812 
10813   // Check the values provided via `simdlen` by the user.
10814   // 1. A `simdlen(1)` doesn't produce vector signatures,
10815   if (UserVLEN == 1) {
10816     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10817         DiagnosticsEngine::Warning,
10818         "The clause simdlen(1) has no effect when targeting aarch64.");
10819     CGM.getDiags().Report(SLoc, DiagID);
10820     return;
10821   }
10822 
10823   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10824   // Advanced SIMD output.
10825   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10826     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10827         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10828                                     "power of 2 when targeting Advanced SIMD.");
10829     CGM.getDiags().Report(SLoc, DiagID);
10830     return;
10831   }
10832 
10833   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10834   // limits.
10835   if (ISA == 's' && UserVLEN != 0) {
10836     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10837       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10838           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10839                                       "lanes in the architectural constraints "
10840                                       "for SVE (min is 128-bit, max is "
10841                                       "2048-bit, by steps of 128-bit)");
10842       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10843       return;
10844     }
10845   }
10846 
10847   // Sort out parameter sequence.
10848   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10849   StringRef Prefix = "_ZGV";
10850   // Generate simdlen from user input (if any).
10851   if (UserVLEN) {
10852     if (ISA == 's') {
10853       // SVE generates only a masked function.
10854       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10855                            OutputBecomesInput, Fn);
10856     } else {
10857       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10858       // Advanced SIMD generates one or two functions, depending on
10859       // the `[not]inbranch` clause.
10860       switch (State) {
10861       case OMPDeclareSimdDeclAttr::BS_Undefined:
10862         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10863                              OutputBecomesInput, Fn);
10864         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10865                              OutputBecomesInput, Fn);
10866         break;
10867       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10868         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10869                              OutputBecomesInput, Fn);
10870         break;
10871       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10872         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10873                              OutputBecomesInput, Fn);
10874         break;
10875       }
10876     }
10877   } else {
10878     // If no user simdlen is provided, follow the AAVFABI rules for
10879     // generating the vector length.
10880     if (ISA == 's') {
10881       // SVE, section 3.4.1, item 1.
10882       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10883                            OutputBecomesInput, Fn);
10884     } else {
10885       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10886       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10887       // two vector names depending on the use of the clause
10888       // `[not]inbranch`.
10889       switch (State) {
10890       case OMPDeclareSimdDeclAttr::BS_Undefined:
10891         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10892                                   OutputBecomesInput, Fn);
10893         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10894                                   OutputBecomesInput, Fn);
10895         break;
10896       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10897         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10898                                   OutputBecomesInput, Fn);
10899         break;
10900       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10901         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10902                                   OutputBecomesInput, Fn);
10903         break;
10904       }
10905     }
10906   }
10907 }
10908 
10909 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10910                                               llvm::Function *Fn) {
10911   ASTContext &C = CGM.getContext();
10912   FD = FD->getMostRecentDecl();
10913   // Map params to their positions in function decl.
10914   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10915   if (isa<CXXMethodDecl>(FD))
10916     ParamPositions.try_emplace(FD, 0);
10917   unsigned ParamPos = ParamPositions.size();
10918   for (const ParmVarDecl *P : FD->parameters()) {
10919     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10920     ++ParamPos;
10921   }
10922   while (FD) {
10923     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10924       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10925       // Mark uniform parameters.
10926       for (const Expr *E : Attr->uniforms()) {
10927         E = E->IgnoreParenImpCasts();
10928         unsigned Pos;
10929         if (isa<CXXThisExpr>(E)) {
10930           Pos = ParamPositions[FD];
10931         } else {
10932           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10933                                 ->getCanonicalDecl();
10934           Pos = ParamPositions[PVD];
10935         }
10936         ParamAttrs[Pos].Kind = Uniform;
10937       }
10938       // Get alignment info.
10939       auto NI = Attr->alignments_begin();
10940       for (const Expr *E : Attr->aligneds()) {
10941         E = E->IgnoreParenImpCasts();
10942         unsigned Pos;
10943         QualType ParmTy;
10944         if (isa<CXXThisExpr>(E)) {
10945           Pos = ParamPositions[FD];
10946           ParmTy = E->getType();
10947         } else {
10948           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10949                                 ->getCanonicalDecl();
10950           Pos = ParamPositions[PVD];
10951           ParmTy = PVD->getType();
10952         }
10953         ParamAttrs[Pos].Alignment =
10954             (*NI)
10955                 ? (*NI)->EvaluateKnownConstInt(C)
10956                 : llvm::APSInt::getUnsigned(
10957                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10958                           .getQuantity());
10959         ++NI;
10960       }
10961       // Mark linear parameters.
10962       auto SI = Attr->steps_begin();
10963       auto MI = Attr->modifiers_begin();
10964       for (const Expr *E : Attr->linears()) {
10965         E = E->IgnoreParenImpCasts();
10966         unsigned Pos;
10967         // Rescaling factor needed to compute the linear parameter
10968         // value in the mangled name.
10969         unsigned PtrRescalingFactor = 1;
10970         if (isa<CXXThisExpr>(E)) {
10971           Pos = ParamPositions[FD];
10972         } else {
10973           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10974                                 ->getCanonicalDecl();
10975           Pos = ParamPositions[PVD];
10976           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10977             PtrRescalingFactor = CGM.getContext()
10978                                      .getTypeSizeInChars(P->getPointeeType())
10979                                      .getQuantity();
10980         }
10981         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10982         ParamAttr.Kind = Linear;
10983         // Assuming a stride of 1, for `linear` without modifiers.
10984         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10985         if (*SI) {
10986           Expr::EvalResult Result;
10987           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10988             if (const auto *DRE =
10989                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10990               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10991                 ParamAttr.Kind = LinearWithVarStride;
10992                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10993                     ParamPositions[StridePVD->getCanonicalDecl()]);
10994               }
10995             }
10996           } else {
10997             ParamAttr.StrideOrArg = Result.Val.getInt();
10998           }
10999         }
11000         // If we are using a linear clause on a pointer, we need to
11001         // rescale the value of linear_step with the byte size of the
11002         // pointee type.
11003         if (Linear == ParamAttr.Kind)
11004           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11005         ++SI;
11006         ++MI;
11007       }
11008       llvm::APSInt VLENVal;
11009       SourceLocation ExprLoc;
11010       const Expr *VLENExpr = Attr->getSimdlen();
11011       if (VLENExpr) {
11012         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11013         ExprLoc = VLENExpr->getExprLoc();
11014       }
11015       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11016       if (CGM.getTriple().isX86()) {
11017         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11018       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11019         unsigned VLEN = VLENVal.getExtValue();
11020         StringRef MangledName = Fn->getName();
11021         if (CGM.getTarget().hasFeature("sve"))
11022           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11023                                          MangledName, 's', 128, Fn, ExprLoc);
11024         if (CGM.getTarget().hasFeature("neon"))
11025           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11026                                          MangledName, 'n', 128, Fn, ExprLoc);
11027       }
11028     }
11029     FD = FD->getPreviousDecl();
11030   }
11031 }
11032 
11033 namespace {
11034 /// Cleanup action for doacross support.
11035 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11036 public:
11037   static const int DoacrossFinArgs = 2;
11038 
11039 private:
11040   llvm::FunctionCallee RTLFn;
11041   llvm::Value *Args[DoacrossFinArgs];
11042 
11043 public:
11044   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11045                     ArrayRef<llvm::Value *> CallArgs)
11046       : RTLFn(RTLFn) {
11047     assert(CallArgs.size() == DoacrossFinArgs);
11048     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11049   }
11050   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11051     if (!CGF.HaveInsertPoint())
11052       return;
11053     CGF.EmitRuntimeCall(RTLFn, Args);
11054   }
11055 };
11056 } // namespace
11057 
11058 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11059                                        const OMPLoopDirective &D,
11060                                        ArrayRef<Expr *> NumIterations) {
11061   if (!CGF.HaveInsertPoint())
11062     return;
11063 
11064   ASTContext &C = CGM.getContext();
11065   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11066   RecordDecl *RD;
11067   if (KmpDimTy.isNull()) {
11068     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11069     //  kmp_int64 lo; // lower
11070     //  kmp_int64 up; // upper
11071     //  kmp_int64 st; // stride
11072     // };
11073     RD = C.buildImplicitRecord("kmp_dim");
11074     RD->startDefinition();
11075     addFieldToRecordDecl(C, RD, Int64Ty);
11076     addFieldToRecordDecl(C, RD, Int64Ty);
11077     addFieldToRecordDecl(C, RD, Int64Ty);
11078     RD->completeDefinition();
11079     KmpDimTy = C.getRecordType(RD);
11080   } else {
11081     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11082   }
11083   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11084   QualType ArrayTy =
11085       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11086 
11087   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11088   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11089   enum { LowerFD = 0, UpperFD, StrideFD };
11090   // Fill dims with data.
11091   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11092     LValue DimsLVal = CGF.MakeAddrLValue(
11093         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11094     // dims.upper = num_iterations;
11095     LValue UpperLVal = CGF.EmitLValueForField(
11096         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11097     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11098         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11099         Int64Ty, NumIterations[I]->getExprLoc());
11100     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11101     // dims.stride = 1;
11102     LValue StrideLVal = CGF.EmitLValueForField(
11103         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11104     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11105                           StrideLVal);
11106   }
11107 
11108   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11109   // kmp_int32 num_dims, struct kmp_dim * dims);
11110   llvm::Value *Args[] = {
11111       emitUpdateLocation(CGF, D.getBeginLoc()),
11112       getThreadID(CGF, D.getBeginLoc()),
11113       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11114       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11115           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11116           CGM.VoidPtrTy)};
11117 
11118   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11119       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11120   CGF.EmitRuntimeCall(RTLFn, Args);
11121   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11122       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11123   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11124       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11125   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11126                                              llvm::makeArrayRef(FiniArgs));
11127 }
11128 
11129 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11130                                           const OMPDependClause *C) {
11131   QualType Int64Ty =
11132       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11133   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11134   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11135       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11136   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11137   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11138     const Expr *CounterVal = C->getLoopData(I);
11139     assert(CounterVal);
11140     llvm::Value *CntVal = CGF.EmitScalarConversion(
11141         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11142         CounterVal->getExprLoc());
11143     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11144                           /*Volatile=*/false, Int64Ty);
11145   }
11146   llvm::Value *Args[] = {
11147       emitUpdateLocation(CGF, C->getBeginLoc()),
11148       getThreadID(CGF, C->getBeginLoc()),
11149       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11150   llvm::FunctionCallee RTLFn;
11151   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11152     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11153                                                   OMPRTL___kmpc_doacross_post);
11154   } else {
11155     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11156     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11157                                                   OMPRTL___kmpc_doacross_wait);
11158   }
11159   CGF.EmitRuntimeCall(RTLFn, Args);
11160 }
11161 
11162 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11163                                llvm::FunctionCallee Callee,
11164                                ArrayRef<llvm::Value *> Args) const {
11165   assert(Loc.isValid() && "Outlined function call location must be valid.");
11166   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11167 
11168   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11169     if (Fn->doesNotThrow()) {
11170       CGF.EmitNounwindRuntimeCall(Fn, Args);
11171       return;
11172     }
11173   }
11174   CGF.EmitRuntimeCall(Callee, Args);
11175 }
11176 
11177 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11178     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11179     ArrayRef<llvm::Value *> Args) const {
11180   emitCall(CGF, Loc, OutlinedFn, Args);
11181 }
11182 
11183 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11184   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11185     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11186       HasEmittedDeclareTargetRegion = true;
11187 }
11188 
11189 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11190                                              const VarDecl *NativeParam,
11191                                              const VarDecl *TargetParam) const {
11192   return CGF.GetAddrOfLocalVar(NativeParam);
11193 }
11194 
11195 namespace {
11196 /// Cleanup action for allocate support.
11197 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11198 public:
11199   static const int CleanupArgs = 3;
11200 
11201 private:
11202   llvm::FunctionCallee RTLFn;
11203   llvm::Value *Args[CleanupArgs];
11204 
11205 public:
11206   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11207                        ArrayRef<llvm::Value *> CallArgs)
11208       : RTLFn(RTLFn) {
11209     assert(CallArgs.size() == CleanupArgs &&
11210            "Size of arguments does not match.");
11211     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11212   }
11213   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11214     if (!CGF.HaveInsertPoint())
11215       return;
11216     CGF.EmitRuntimeCall(RTLFn, Args);
11217   }
11218 };
11219 } // namespace
11220 
11221 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11222                                                    const VarDecl *VD) {
11223   if (!VD)
11224     return Address::invalid();
11225   const VarDecl *CVD = VD->getCanonicalDecl();
11226   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11227     return Address::invalid();
11228   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11229   // Use the default allocation.
11230   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11231        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11232       !AA->getAllocator())
11233     return Address::invalid();
11234   llvm::Value *Size;
11235   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11236   if (CVD->getType()->isVariablyModifiedType()) {
11237     Size = CGF.getTypeSize(CVD->getType());
11238     // Align the size: ((size + align - 1) / align) * align
11239     Size = CGF.Builder.CreateNUWAdd(
11240         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11241     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11242     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11243   } else {
11244     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11245     Size = CGM.getSize(Sz.alignTo(Align));
11246   }
11247   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11248   assert(AA->getAllocator() &&
11249          "Expected allocator expression for non-default allocator.");
11250   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11251   // According to the standard, the original allocator type is a enum (integer).
11252   // Convert to pointer type, if required.
11253   if (Allocator->getType()->isIntegerTy())
11254     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11255   else if (Allocator->getType()->isPointerTy())
11256     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11257                                                                 CGM.VoidPtrTy);
11258   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11259 
11260   llvm::Value *Addr =
11261       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11262                               CGM.getModule(), OMPRTL___kmpc_alloc),
11263                           Args, getName({CVD->getName(), ".void.addr"}));
11264   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11265                                                               Allocator};
11266   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11267       CGM.getModule(), OMPRTL___kmpc_free);
11268 
11269   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11270                                                 llvm::makeArrayRef(FiniArgs));
11271   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11272       Addr,
11273       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11274       getName({CVD->getName(), ".addr"}));
11275   return Address(Addr, Align);
11276 }
11277 
11278 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11279     CodeGenModule &CGM, const OMPLoopDirective &S)
11280     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11281   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11282   if (!NeedToPush)
11283     return;
11284   NontemporalDeclsSet &DS =
11285       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11286   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11287     for (const Stmt *Ref : C->private_refs()) {
11288       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11289       const ValueDecl *VD;
11290       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11291         VD = DRE->getDecl();
11292       } else {
11293         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11294         assert((ME->isImplicitCXXThis() ||
11295                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11296                "Expected member of current class.");
11297         VD = ME->getMemberDecl();
11298       }
11299       DS.insert(VD);
11300     }
11301   }
11302 }
11303 
11304 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11305   if (!NeedToPush)
11306     return;
11307   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11308 }
11309 
11310 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11311   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11312 
11313   return llvm::any_of(
11314       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11315       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11316 }
11317 
11318 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11319     const OMPExecutableDirective &S,
11320     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11321     const {
11322   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11323   // Vars in target/task regions must be excluded completely.
11324   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11325       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11326     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11327     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11328     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11329     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11330       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11331         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11332     }
11333   }
11334   // Exclude vars in private clauses.
11335   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11336     for (const Expr *Ref : C->varlists()) {
11337       if (!Ref->getType()->isScalarType())
11338         continue;
11339       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11340       if (!DRE)
11341         continue;
11342       NeedToCheckForLPCs.insert(DRE->getDecl());
11343     }
11344   }
11345   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11346     for (const Expr *Ref : C->varlists()) {
11347       if (!Ref->getType()->isScalarType())
11348         continue;
11349       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11350       if (!DRE)
11351         continue;
11352       NeedToCheckForLPCs.insert(DRE->getDecl());
11353     }
11354   }
11355   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11356     for (const Expr *Ref : C->varlists()) {
11357       if (!Ref->getType()->isScalarType())
11358         continue;
11359       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11360       if (!DRE)
11361         continue;
11362       NeedToCheckForLPCs.insert(DRE->getDecl());
11363     }
11364   }
11365   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11366     for (const Expr *Ref : C->varlists()) {
11367       if (!Ref->getType()->isScalarType())
11368         continue;
11369       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11370       if (!DRE)
11371         continue;
11372       NeedToCheckForLPCs.insert(DRE->getDecl());
11373     }
11374   }
11375   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11376     for (const Expr *Ref : C->varlists()) {
11377       if (!Ref->getType()->isScalarType())
11378         continue;
11379       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11380       if (!DRE)
11381         continue;
11382       NeedToCheckForLPCs.insert(DRE->getDecl());
11383     }
11384   }
11385   for (const Decl *VD : NeedToCheckForLPCs) {
11386     for (const LastprivateConditionalData &Data :
11387          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11388       if (Data.DeclToUniqueName.count(VD) > 0) {
11389         if (!Data.Disabled)
11390           NeedToAddForLPCsAsDisabled.insert(VD);
11391         break;
11392       }
11393     }
11394   }
11395 }
11396 
11397 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11398     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11399     : CGM(CGF.CGM),
11400       Action((CGM.getLangOpts().OpenMP >= 50 &&
11401               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11402                            [](const OMPLastprivateClause *C) {
11403                              return C->getKind() ==
11404                                     OMPC_LASTPRIVATE_conditional;
11405                            }))
11406                  ? ActionToDo::PushAsLastprivateConditional
11407                  : ActionToDo::DoNotPush) {
11408   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11409   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11410     return;
11411   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11412          "Expected a push action.");
11413   LastprivateConditionalData &Data =
11414       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11415   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11416     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11417       continue;
11418 
11419     for (const Expr *Ref : C->varlists()) {
11420       Data.DeclToUniqueName.insert(std::make_pair(
11421           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11422           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11423     }
11424   }
11425   Data.IVLVal = IVLVal;
11426   Data.Fn = CGF.CurFn;
11427 }
11428 
11429 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11430     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11431     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11432   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433   if (CGM.getLangOpts().OpenMP < 50)
11434     return;
11435   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11436   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11437   if (!NeedToAddForLPCsAsDisabled.empty()) {
11438     Action = ActionToDo::DisableLastprivateConditional;
11439     LastprivateConditionalData &Data =
11440         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11441     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11442       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11443     Data.Fn = CGF.CurFn;
11444     Data.Disabled = true;
11445   }
11446 }
11447 
11448 CGOpenMPRuntime::LastprivateConditionalRAII
11449 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11450     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11451   return LastprivateConditionalRAII(CGF, S);
11452 }
11453 
11454 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11455   if (CGM.getLangOpts().OpenMP < 50)
11456     return;
11457   if (Action == ActionToDo::DisableLastprivateConditional) {
11458     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11459            "Expected list of disabled private vars.");
11460     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11461   }
11462   if (Action == ActionToDo::PushAsLastprivateConditional) {
11463     assert(
11464         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11465         "Expected list of lastprivate conditional vars.");
11466     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11467   }
11468 }
11469 
11470 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11471                                                         const VarDecl *VD) {
11472   ASTContext &C = CGM.getContext();
11473   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11474   if (I == LastprivateConditionalToTypes.end())
11475     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11476   QualType NewType;
11477   const FieldDecl *VDField;
11478   const FieldDecl *FiredField;
11479   LValue BaseLVal;
11480   auto VI = I->getSecond().find(VD);
11481   if (VI == I->getSecond().end()) {
11482     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11483     RD->startDefinition();
11484     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11485     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11486     RD->completeDefinition();
11487     NewType = C.getRecordType(RD);
11488     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11489     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11490     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11491   } else {
11492     NewType = std::get<0>(VI->getSecond());
11493     VDField = std::get<1>(VI->getSecond());
11494     FiredField = std::get<2>(VI->getSecond());
11495     BaseLVal = std::get<3>(VI->getSecond());
11496   }
11497   LValue FiredLVal =
11498       CGF.EmitLValueForField(BaseLVal, FiredField);
11499   CGF.EmitStoreOfScalar(
11500       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11501       FiredLVal);
11502   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11503 }
11504 
11505 namespace {
11506 /// Checks if the lastprivate conditional variable is referenced in LHS.
11507 class LastprivateConditionalRefChecker final
11508     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11509   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11510   const Expr *FoundE = nullptr;
11511   const Decl *FoundD = nullptr;
11512   StringRef UniqueDeclName;
11513   LValue IVLVal;
11514   llvm::Function *FoundFn = nullptr;
11515   SourceLocation Loc;
11516 
11517 public:
11518   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11519     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11520          llvm::reverse(LPM)) {
11521       auto It = D.DeclToUniqueName.find(E->getDecl());
11522       if (It == D.DeclToUniqueName.end())
11523         continue;
11524       if (D.Disabled)
11525         return false;
11526       FoundE = E;
11527       FoundD = E->getDecl()->getCanonicalDecl();
11528       UniqueDeclName = It->second;
11529       IVLVal = D.IVLVal;
11530       FoundFn = D.Fn;
11531       break;
11532     }
11533     return FoundE == E;
11534   }
11535   bool VisitMemberExpr(const MemberExpr *E) {
11536     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11537       return false;
11538     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11539          llvm::reverse(LPM)) {
11540       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11541       if (It == D.DeclToUniqueName.end())
11542         continue;
11543       if (D.Disabled)
11544         return false;
11545       FoundE = E;
11546       FoundD = E->getMemberDecl()->getCanonicalDecl();
11547       UniqueDeclName = It->second;
11548       IVLVal = D.IVLVal;
11549       FoundFn = D.Fn;
11550       break;
11551     }
11552     return FoundE == E;
11553   }
11554   bool VisitStmt(const Stmt *S) {
11555     for (const Stmt *Child : S->children()) {
11556       if (!Child)
11557         continue;
11558       if (const auto *E = dyn_cast<Expr>(Child))
11559         if (!E->isGLValue())
11560           continue;
11561       if (Visit(Child))
11562         return true;
11563     }
11564     return false;
11565   }
11566   explicit LastprivateConditionalRefChecker(
11567       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11568       : LPM(LPM) {}
11569   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11570   getFoundData() const {
11571     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11572   }
11573 };
11574 } // namespace
11575 
11576 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11577                                                        LValue IVLVal,
11578                                                        StringRef UniqueDeclName,
11579                                                        LValue LVal,
11580                                                        SourceLocation Loc) {
11581   // Last updated loop counter for the lastprivate conditional var.
11582   // int<xx> last_iv = 0;
11583   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11584   llvm::Constant *LastIV =
11585       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11586   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11587       IVLVal.getAlignment().getAsAlign());
11588   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11589 
11590   // Last value of the lastprivate conditional.
11591   // decltype(priv_a) last_a;
11592   llvm::Constant *Last = getOrCreateInternalVariable(
11593       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11594   cast<llvm::GlobalVariable>(Last)->setAlignment(
11595       LVal.getAlignment().getAsAlign());
11596   LValue LastLVal =
11597       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11598 
11599   // Global loop counter. Required to handle inner parallel-for regions.
11600   // iv
11601   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11602 
11603   // #pragma omp critical(a)
11604   // if (last_iv <= iv) {
11605   //   last_iv = iv;
11606   //   last_a = priv_a;
11607   // }
11608   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11609                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11610     Action.Enter(CGF);
11611     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11612     // (last_iv <= iv) ? Check if the variable is updated and store new
11613     // value in global var.
11614     llvm::Value *CmpRes;
11615     if (IVLVal.getType()->isSignedIntegerType()) {
11616       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11617     } else {
11618       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11619              "Loop iteration variable must be integer.");
11620       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11621     }
11622     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11623     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11624     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11625     // {
11626     CGF.EmitBlock(ThenBB);
11627 
11628     //   last_iv = iv;
11629     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11630 
11631     //   last_a = priv_a;
11632     switch (CGF.getEvaluationKind(LVal.getType())) {
11633     case TEK_Scalar: {
11634       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11635       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11636       break;
11637     }
11638     case TEK_Complex: {
11639       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11640       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11641       break;
11642     }
11643     case TEK_Aggregate:
11644       llvm_unreachable(
11645           "Aggregates are not supported in lastprivate conditional.");
11646     }
11647     // }
11648     CGF.EmitBranch(ExitBB);
11649     // There is no need to emit line number for unconditional branch.
11650     (void)ApplyDebugLocation::CreateEmpty(CGF);
11651     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11652   };
11653 
11654   if (CGM.getLangOpts().OpenMPSimd) {
11655     // Do not emit as a critical region as no parallel region could be emitted.
11656     RegionCodeGenTy ThenRCG(CodeGen);
11657     ThenRCG(CGF);
11658   } else {
11659     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11660   }
11661 }
11662 
11663 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11664                                                          const Expr *LHS) {
11665   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11666     return;
11667   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11668   if (!Checker.Visit(LHS))
11669     return;
11670   const Expr *FoundE;
11671   const Decl *FoundD;
11672   StringRef UniqueDeclName;
11673   LValue IVLVal;
11674   llvm::Function *FoundFn;
11675   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11676       Checker.getFoundData();
11677   if (FoundFn != CGF.CurFn) {
11678     // Special codegen for inner parallel regions.
11679     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11680     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11681     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11682            "Lastprivate conditional is not found in outer region.");
11683     QualType StructTy = std::get<0>(It->getSecond());
11684     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11685     LValue PrivLVal = CGF.EmitLValue(FoundE);
11686     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11687         PrivLVal.getAddress(CGF),
11688         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11689     LValue BaseLVal =
11690         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11691     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11692     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11693                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11694                         FiredLVal, llvm::AtomicOrdering::Unordered,
11695                         /*IsVolatile=*/true, /*isInit=*/false);
11696     return;
11697   }
11698 
11699   // Private address of the lastprivate conditional in the current context.
11700   // priv_a
11701   LValue LVal = CGF.EmitLValue(FoundE);
11702   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11703                                    FoundE->getExprLoc());
11704 }
11705 
11706 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11707     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11708     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11709   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11710     return;
11711   auto Range = llvm::reverse(LastprivateConditionalStack);
11712   auto It = llvm::find_if(
11713       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11714   if (It == Range.end() || It->Fn != CGF.CurFn)
11715     return;
11716   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11717   assert(LPCI != LastprivateConditionalToTypes.end() &&
11718          "Lastprivates must be registered already.");
11719   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11720   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11721   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11722   for (const auto &Pair : It->DeclToUniqueName) {
11723     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11724     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11725       continue;
11726     auto I = LPCI->getSecond().find(Pair.first);
11727     assert(I != LPCI->getSecond().end() &&
11728            "Lastprivate must be rehistered already.");
11729     // bool Cmp = priv_a.Fired != 0;
11730     LValue BaseLVal = std::get<3>(I->getSecond());
11731     LValue FiredLVal =
11732         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11733     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11734     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11735     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11736     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11737     // if (Cmp) {
11738     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11739     CGF.EmitBlock(ThenBB);
11740     Address Addr = CGF.GetAddrOfLocalVar(VD);
11741     LValue LVal;
11742     if (VD->getType()->isReferenceType())
11743       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11744                                            AlignmentSource::Decl);
11745     else
11746       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11747                                 AlignmentSource::Decl);
11748     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11749                                      D.getBeginLoc());
11750     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11751     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11752     // }
11753   }
11754 }
11755 
11756 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11757     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11758     SourceLocation Loc) {
11759   if (CGF.getLangOpts().OpenMP < 50)
11760     return;
11761   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11762   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11763          "Unknown lastprivate conditional variable.");
11764   StringRef UniqueName = It->second;
11765   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11766   // The variable was not updated in the region - exit.
11767   if (!GV)
11768     return;
11769   LValue LPLVal = CGF.MakeAddrLValue(
11770       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11771   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11772   CGF.EmitStoreOfScalar(Res, PrivLVal);
11773 }
11774 
11775 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11776     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11777     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11778   llvm_unreachable("Not supported in SIMD-only mode");
11779 }
11780 
11781 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11782     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11783     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11784   llvm_unreachable("Not supported in SIMD-only mode");
11785 }
11786 
11787 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11788     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11789     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11790     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11791     bool Tied, unsigned &NumberOfParts) {
11792   llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794 
11795 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11796                                            SourceLocation Loc,
11797                                            llvm::Function *OutlinedFn,
11798                                            ArrayRef<llvm::Value *> CapturedVars,
11799                                            const Expr *IfCond) {
11800   llvm_unreachable("Not supported in SIMD-only mode");
11801 }
11802 
11803 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11804     CodeGenFunction &CGF, StringRef CriticalName,
11805     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11806     const Expr *Hint) {
11807   llvm_unreachable("Not supported in SIMD-only mode");
11808 }
11809 
11810 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11811                                            const RegionCodeGenTy &MasterOpGen,
11812                                            SourceLocation Loc) {
11813   llvm_unreachable("Not supported in SIMD-only mode");
11814 }
11815 
11816 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11817                                             SourceLocation Loc) {
11818   llvm_unreachable("Not supported in SIMD-only mode");
11819 }
11820 
11821 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11822     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11823     SourceLocation Loc) {
11824   llvm_unreachable("Not supported in SIMD-only mode");
11825 }
11826 
11827 void CGOpenMPSIMDRuntime::emitSingleRegion(
11828     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11829     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11830     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11831     ArrayRef<const Expr *> AssignmentOps) {
11832   llvm_unreachable("Not supported in SIMD-only mode");
11833 }
11834 
11835 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11836                                             const RegionCodeGenTy &OrderedOpGen,
11837                                             SourceLocation Loc,
11838                                             bool IsThreads) {
11839   llvm_unreachable("Not supported in SIMD-only mode");
11840 }
11841 
11842 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11843                                           SourceLocation Loc,
11844                                           OpenMPDirectiveKind Kind,
11845                                           bool EmitChecks,
11846                                           bool ForceSimpleCall) {
11847   llvm_unreachable("Not supported in SIMD-only mode");
11848 }
11849 
11850 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11851     CodeGenFunction &CGF, SourceLocation Loc,
11852     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11853     bool Ordered, const DispatchRTInput &DispatchValues) {
11854   llvm_unreachable("Not supported in SIMD-only mode");
11855 }
11856 
11857 void CGOpenMPSIMDRuntime::emitForStaticInit(
11858     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11859     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11860   llvm_unreachable("Not supported in SIMD-only mode");
11861 }
11862 
11863 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11864     CodeGenFunction &CGF, SourceLocation Loc,
11865     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11866   llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868 
11869 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11870                                                      SourceLocation Loc,
11871                                                      unsigned IVSize,
11872                                                      bool IVSigned) {
11873   llvm_unreachable("Not supported in SIMD-only mode");
11874 }
11875 
11876 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11877                                               SourceLocation Loc,
11878                                               OpenMPDirectiveKind DKind) {
11879   llvm_unreachable("Not supported in SIMD-only mode");
11880 }
11881 
11882 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11883                                               SourceLocation Loc,
11884                                               unsigned IVSize, bool IVSigned,
11885                                               Address IL, Address LB,
11886                                               Address UB, Address ST) {
11887   llvm_unreachable("Not supported in SIMD-only mode");
11888 }
11889 
11890 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11891                                                llvm::Value *NumThreads,
11892                                                SourceLocation Loc) {
11893   llvm_unreachable("Not supported in SIMD-only mode");
11894 }
11895 
11896 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11897                                              ProcBindKind ProcBind,
11898                                              SourceLocation Loc) {
11899   llvm_unreachable("Not supported in SIMD-only mode");
11900 }
11901 
11902 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11903                                                     const VarDecl *VD,
11904                                                     Address VDAddr,
11905                                                     SourceLocation Loc) {
11906   llvm_unreachable("Not supported in SIMD-only mode");
11907 }
11908 
11909 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11910     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11911     CodeGenFunction *CGF) {
11912   llvm_unreachable("Not supported in SIMD-only mode");
11913 }
11914 
11915 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11916     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11921                                     ArrayRef<const Expr *> Vars,
11922                                     SourceLocation Loc,
11923                                     llvm::AtomicOrdering AO) {
11924   llvm_unreachable("Not supported in SIMD-only mode");
11925 }
11926 
11927 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11928                                        const OMPExecutableDirective &D,
11929                                        llvm::Function *TaskFunction,
11930                                        QualType SharedsTy, Address Shareds,
11931                                        const Expr *IfCond,
11932                                        const OMPTaskDataTy &Data) {
11933   llvm_unreachable("Not supported in SIMD-only mode");
11934 }
11935 
11936 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11937     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11938     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11939     const Expr *IfCond, const OMPTaskDataTy &Data) {
11940   llvm_unreachable("Not supported in SIMD-only mode");
11941 }
11942 
11943 void CGOpenMPSIMDRuntime::emitReduction(
11944     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11945     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11946     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11947   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11948   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11949                                  ReductionOps, Options);
11950 }
11951 
11952 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11953     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11954     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11955   llvm_unreachable("Not supported in SIMD-only mode");
11956 }
11957 
11958 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11959                                                 SourceLocation Loc,
11960                                                 bool IsWorksharingReduction) {
11961   llvm_unreachable("Not supported in SIMD-only mode");
11962 }
11963 
11964 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11965                                                   SourceLocation Loc,
11966                                                   ReductionCodeGen &RCG,
11967                                                   unsigned N) {
11968   llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970 
11971 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11972                                                   SourceLocation Loc,
11973                                                   llvm::Value *ReductionsPtr,
11974                                                   LValue SharedLVal) {
11975   llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977 
11978 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11979                                            SourceLocation Loc) {
11980   llvm_unreachable("Not supported in SIMD-only mode");
11981 }
11982 
11983 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11984     CodeGenFunction &CGF, SourceLocation Loc,
11985     OpenMPDirectiveKind CancelRegion) {
11986   llvm_unreachable("Not supported in SIMD-only mode");
11987 }
11988 
11989 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11990                                          SourceLocation Loc, const Expr *IfCond,
11991                                          OpenMPDirectiveKind CancelRegion) {
11992   llvm_unreachable("Not supported in SIMD-only mode");
11993 }
11994 
11995 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11996     const OMPExecutableDirective &D, StringRef ParentName,
11997     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11998     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11999   llvm_unreachable("Not supported in SIMD-only mode");
12000 }
12001 
12002 void CGOpenMPSIMDRuntime::emitTargetCall(
12003     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12004     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12005     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12006     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12007                                      const OMPLoopDirective &D)>
12008         SizeEmitter) {
12009   llvm_unreachable("Not supported in SIMD-only mode");
12010 }
12011 
12012 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12013   llvm_unreachable("Not supported in SIMD-only mode");
12014 }
12015 
12016 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019 
12020 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12021   return false;
12022 }
12023 
12024 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12025                                         const OMPExecutableDirective &D,
12026                                         SourceLocation Loc,
12027                                         llvm::Function *OutlinedFn,
12028                                         ArrayRef<llvm::Value *> CapturedVars) {
12029   llvm_unreachable("Not supported in SIMD-only mode");
12030 }
12031 
12032 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12033                                              const Expr *NumTeams,
12034                                              const Expr *ThreadLimit,
12035                                              SourceLocation Loc) {
12036   llvm_unreachable("Not supported in SIMD-only mode");
12037 }
12038 
12039 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12040     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12041     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12042   llvm_unreachable("Not supported in SIMD-only mode");
12043 }
12044 
12045 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12046     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12047     const Expr *Device) {
12048   llvm_unreachable("Not supported in SIMD-only mode");
12049 }
12050 
12051 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12052                                            const OMPLoopDirective &D,
12053                                            ArrayRef<Expr *> NumIterations) {
12054   llvm_unreachable("Not supported in SIMD-only mode");
12055 }
12056 
12057 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12058                                               const OMPDependClause *C) {
12059   llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061 
12062 const VarDecl *
12063 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12064                                         const VarDecl *NativeParam) const {
12065   llvm_unreachable("Not supported in SIMD-only mode");
12066 }
12067 
12068 Address
12069 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12070                                          const VarDecl *NativeParam,
12071                                          const VarDecl *TargetParam) const {
12072   llvm_unreachable("Not supported in SIMD-only mode");
12073 }
12074