1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412   bool NoInheritance = false;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel,
421                           bool NoInheritance = true)
422       : CGF(CGF), NoInheritance(NoInheritance) {
423     // Start emission for the construct.
424     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
425         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
426     if (NoInheritance) {
427       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
429       CGF.LambdaThisCaptureField = nullptr;
430       BlockInfo = CGF.BlockInfo;
431       CGF.BlockInfo = nullptr;
432     }
433   }
434 
435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     if (NoInheritance) {
442       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
443       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
444       CGF.BlockInfo = BlockInfo;
445     }
446   }
447 };
448 
449 /// Values for bit flags used in the ident_t to describe the fields.
450 /// All enumeric elements are named and described in accordance with the code
451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
452 enum OpenMPLocationFlags : unsigned {
453   /// Use trampoline for internal microtask.
454   OMP_IDENT_IMD = 0x01,
455   /// Use c-style ident structure.
456   OMP_IDENT_KMPC = 0x02,
457   /// Atomic reduction option for kmpc_reduce.
458   OMP_ATOMIC_REDUCE = 0x10,
459   /// Explicit 'barrier' directive.
460   OMP_IDENT_BARRIER_EXPL = 0x20,
461   /// Implicit barrier in code.
462   OMP_IDENT_BARRIER_IMPL = 0x40,
463   /// Implicit barrier in 'for' directive.
464   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
465   /// Implicit barrier in 'sections' directive.
466   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
467   /// Implicit barrier in 'single' directive.
468   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
469   /// Call of __kmp_for_static_init for static loop.
470   OMP_IDENT_WORK_LOOP = 0x200,
471   /// Call of __kmp_for_static_init for sections.
472   OMP_IDENT_WORK_SECTIONS = 0x400,
473   /// Call of __kmp_for_static_init for distribute.
474   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
475   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
476 };
477 
478 namespace {
479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
480 /// Values for bit flags for marking which requires clauses have been used.
481 enum OpenMPOffloadingRequiresDirFlags : int64_t {
482   /// flag undefined.
483   OMP_REQ_UNDEFINED               = 0x000,
484   /// no requires clause present.
485   OMP_REQ_NONE                    = 0x001,
486   /// reverse_offload clause.
487   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
488   /// unified_address clause.
489   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
490   /// unified_shared_memory clause.
491   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
492   /// dynamic_allocators clause.
493   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
494   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
495 };
496 
497 enum OpenMPOffloadingReservedDeviceIDs {
498   /// Device ID if the device was not defined, runtime should get it
499   /// from environment variables in the spec.
500   OMP_DEVICEID_UNDEF = -1,
501 };
502 } // anonymous namespace
503 
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
510 ///                                  see above  */
511 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
512 ///                                  KMP_IDENT_KMPC identifies this union
513 ///                                  member  */
514 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
515 ///                                  see above */
516 ///#if USE_ITT_BUILD
517 ///                            /*  but currently used for storing
518 ///                                region-specific ITT */
519 ///                            /*  contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
522 ///                                 C++  */
523 ///    char const *psource;    /**< String describing the source location.
524 ///                            The string is composed of semi-colon separated
525 //                             fields which describe the source file,
526 ///                            the function and a pair of line numbers that
527 ///                            delimit the construct.
528 ///                             */
529 /// } ident_t;
530 enum IdentFieldIndex {
531   /// might be used in Fortran
532   IdentField_Reserved_1,
533   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534   IdentField_Flags,
535   /// Not really used in Fortran any more
536   IdentField_Reserved_2,
537   /// Source[4] in Fortran, do not use for C++
538   IdentField_Reserved_3,
539   /// String describing the source location. The string is composed of
540   /// semi-colon separated fields which describe the source file, the function
541   /// and a pair of line numbers that delimit the construct.
542   IdentField_PSource
543 };
544 
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548   /// Lower bound for default (unordered) versions.
549   OMP_sch_lower = 32,
550   OMP_sch_static_chunked = 33,
551   OMP_sch_static = 34,
552   OMP_sch_dynamic_chunked = 35,
553   OMP_sch_guided_chunked = 36,
554   OMP_sch_runtime = 37,
555   OMP_sch_auto = 38,
556   /// static with chunk adjustment (e.g., simd)
557   OMP_sch_static_balanced_chunked = 45,
558   /// Lower bound for 'ordered' versions.
559   OMP_ord_lower = 64,
560   OMP_ord_static_chunked = 65,
561   OMP_ord_static = 66,
562   OMP_ord_dynamic_chunked = 67,
563   OMP_ord_guided_chunked = 68,
564   OMP_ord_runtime = 69,
565   OMP_ord_auto = 70,
566   OMP_sch_default = OMP_sch_static,
567   /// dist_schedule types
568   OMP_dist_sch_static_chunked = 91,
569   OMP_dist_sch_static = 92,
570   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571   /// Set if the monotonic schedule modifier was present.
572   OMP_sch_modifier_monotonic = (1 << 29),
573   /// Set if the nonmonotonic schedule modifier was present.
574   OMP_sch_modifier_nonmonotonic = (1 << 30),
575 };
576 
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580   PrePostActionTy *Action;
581 
582 public:
583   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585     if (!CGF.HaveInsertPoint())
586       return;
587     Action->Exit(CGF);
588   }
589 };
590 
591 } // anonymous namespace
592 
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594   CodeGenFunction::RunCleanupsScope Scope(CGF);
595   if (PrePostAction) {
596     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597     Callback(CodeGen, CGF, *PrePostAction);
598   } else {
599     PrePostActionTy Action;
600     Callback(CodeGen, CGF, Action);
601   }
602 }
603 
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610       if (const auto *DRE =
611               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613           return DRD;
614   return nullptr;
615 }
616 
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618                                              const OMPDeclareReductionDecl *DRD,
619                                              const Expr *InitOp,
620                                              Address Private, Address Original,
621                                              QualType Ty) {
622   if (DRD->getInitializer()) {
623     std::pair<llvm::Function *, llvm::Function *> Reduction =
624         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625     const auto *CE = cast<CallExpr>(InitOp);
626     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629     const auto *LHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631     const auto *RHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
635                             [=]() { return Private; });
636     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
637                             [=]() { return Original; });
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate: {
659       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
660       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
661       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
662                            /*IsInitializer=*/false);
663       return;
664     }
665     }
666     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
667     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
668     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
669                          /*IsInitializer=*/false);
670   }
671 }
672 
673 /// Emit initialization of arrays of complex types.
674 /// \param DestAddr Address of the array.
675 /// \param Type Type of array.
676 /// \param Init Initial expression of array.
677 /// \param SrcAddr Address of the original array.
678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
679                                  QualType Type, bool EmitDeclareReductionInit,
680                                  const Expr *Init,
681                                  const OMPDeclareReductionDecl *DRD,
682                                  Address SrcAddr = Address::invalid()) {
683   // Perform element-by-element initialization.
684   QualType ElementTy;
685 
686   // Drill down to the base element type on both arrays.
687   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
688   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
689   DestAddr =
690       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI,
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI,
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
746     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
747   }
748 
749   // Shift the address forward by one element.
750   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
751       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
752   // Check whether we've reached the end.
753   llvm::Value *Done =
754       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
756   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
757 
758   // Done.
759   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
760 }
761 
762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
763   return CGF.EmitOMPSharedLValue(E);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
767                                             const Expr *E) {
768   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
769     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
770   return LValue();
771 }
772 
773 void ReductionCodeGen::emitAggregateInitialization(
774     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
775     const OMPDeclareReductionDecl *DRD) {
776   // Emit VarDecl with copy init for arrays.
777   // Get the address of the original variable captured in current
778   // captured region.
779   const auto *PrivateVD =
780       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
781   bool EmitDeclareReductionInit =
782       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
783   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
784                        EmitDeclareReductionInit,
785                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
786                                                 : PrivateVD->getInit(),
787                        DRD, SharedLVal.getAddress(CGF));
788 }
789 
790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
791                                    ArrayRef<const Expr *> Origs,
792                                    ArrayRef<const Expr *> Privates,
793                                    ArrayRef<const Expr *> ReductionOps) {
794   ClausesData.reserve(Shareds.size());
795   SharedAddresses.reserve(Shareds.size());
796   Sizes.reserve(Shareds.size());
797   BaseDecls.reserve(Shareds.size());
798   const auto *IOrig = Origs.begin();
799   const auto *IPriv = Privates.begin();
800   const auto *IRed = ReductionOps.begin();
801   for (const Expr *Ref : Shareds) {
802     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
803     std::advance(IOrig, 1);
804     std::advance(IPriv, 1);
805     std::advance(IRed, 1);
806   }
807 }
808 
809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
810   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
811          "Number of generated lvalues must be exactly N.");
812   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
813   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
814   SharedAddresses.emplace_back(First, Second);
815   if (ClausesData[N].Shared == ClausesData[N].Ref) {
816     OrigAddresses.emplace_back(First, Second);
817   } else {
818     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
819     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
820     OrigAddresses.emplace_back(First, Second);
821   }
822 }
823 
824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
825   const auto *PrivateVD =
826       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
827   QualType PrivateType = PrivateVD->getType();
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType =
838       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
839           ->getElementType();
840   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
841   if (AsArraySection) {
842     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
843                                      OrigAddresses[N].first.getPointer(CGF));
844     Size = CGF.Builder.CreateNUWAdd(
845         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
846     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
847   } else {
848     SizeInChars =
849         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
850     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851   }
852   Sizes.emplace_back(SizeInChars, Size);
853   CodeGenFunction::OpaqueValueMapping OpaqueMap(
854       CGF,
855       cast<OpaqueValueExpr>(
856           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
857       RValue::get(Size));
858   CGF.EmitVariablyModifiedType(PrivateType);
859 }
860 
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
862                                          llvm::Value *Size) {
863   const auto *PrivateVD =
864       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
865   QualType PrivateType = PrivateVD->getType();
866   if (!PrivateType->isVariablyModifiedType()) {
867     assert(!Size && !Sizes[N].second &&
868            "Size should be nullptr for non-variably modified reduction "
869            "items.");
870     return;
871   }
872   CodeGenFunction::OpaqueValueMapping OpaqueMap(
873       CGF,
874       cast<OpaqueValueExpr>(
875           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
876       RValue::get(Size));
877   CGF.EmitVariablyModifiedType(PrivateType);
878 }
879 
880 void ReductionCodeGen::emitInitialization(
881     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
882     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
883   assert(SharedAddresses.size() > N && "No variable was generated");
884   const auto *PrivateVD =
885       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
886   const OMPDeclareReductionDecl *DRD =
887       getReductionInit(ClausesData[N].ReductionOp);
888   QualType PrivateType = PrivateVD->getType();
889   PrivateAddr = CGF.Builder.CreateElementBitCast(
890       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
891   QualType SharedType = SharedAddresses[N].first.getType();
892   SharedLVal = CGF.MakeAddrLValue(
893       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
894                                        CGF.ConvertTypeForMem(SharedType)),
895       SharedType, SharedAddresses[N].first.getBaseInfo(),
896       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
897   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
898     if (DRD && DRD->getInitializer())
899       (void)DefaultInit(CGF);
900     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
901   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
902     (void)DefaultInit(CGF);
903     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
904                                      PrivateAddr, SharedLVal.getAddress(CGF),
905                                      SharedLVal.getType());
906   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
907              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
908     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
909                          PrivateVD->getType().getQualifiers(),
910                          /*IsInitializer=*/false);
911   }
912 }
913 
914 bool ReductionCodeGen::needCleanups(unsigned N) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   return DTorKind != QualType::DK_none;
920 }
921 
922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
923                                     Address PrivateAddr) {
924   const auto *PrivateVD =
925       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926   QualType PrivateType = PrivateVD->getType();
927   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
928   if (needCleanups(N)) {
929     PrivateAddr = CGF.Builder.CreateElementBitCast(
930         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
931     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
932   }
933 }
934 
935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
936                           LValue BaseLV) {
937   BaseTy = BaseTy.getNonReferenceType();
938   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
939          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
940     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
941       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
942     } else {
943       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
944       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
945     }
946     BaseTy = BaseTy->getPointeeType();
947   }
948   return CGF.MakeAddrLValue(
949       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
950                                        CGF.ConvertTypeForMem(ElTy)),
951       BaseLV.getType(), BaseLV.getBaseInfo(),
952       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
953 }
954 
955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
956                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
957                           llvm::Value *Addr) {
958   Address Tmp = Address::invalid();
959   Address TopTmp = Address::invalid();
960   Address MostTopTmp = Address::invalid();
961   BaseTy = BaseTy.getNonReferenceType();
962   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
963          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
964     Tmp = CGF.CreateMemTemp(BaseTy);
965     if (TopTmp.isValid())
966       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
967     else
968       MostTopTmp = Tmp;
969     TopTmp = Tmp;
970     BaseTy = BaseTy->getPointeeType();
971   }
972   llvm::Type *Ty = BaseLVType;
973   if (Tmp.isValid())
974     Ty = Tmp.getElementType();
975   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
976   if (Tmp.isValid()) {
977     CGF.Builder.CreateStore(Addr, Tmp);
978     return MostTopTmp;
979   }
980   return Address(Addr, BaseLVAlignment);
981 }
982 
983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
984   const VarDecl *OrigVD = nullptr;
985   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
986     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
987     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
988       Base = TempOASE->getBase()->IgnoreParenImpCasts();
989     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
990       Base = TempASE->getBase()->IgnoreParenImpCasts();
991     DE = cast<DeclRefExpr>(Base);
992     OrigVD = cast<VarDecl>(DE->getDecl());
993   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
994     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
995     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
996       Base = TempASE->getBase()->IgnoreParenImpCasts();
997     DE = cast<DeclRefExpr>(Base);
998     OrigVD = cast<VarDecl>(DE->getDecl());
999   }
1000   return OrigVD;
1001 }
1002 
1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1004                                                Address PrivateAddr) {
1005   const DeclRefExpr *DE;
1006   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1007     BaseDecls.emplace_back(OrigVD);
1008     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1009     LValue BaseLValue =
1010         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1011                     OriginalBaseLValue);
1012     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1013         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1014     llvm::Value *PrivatePointer =
1015         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1016             PrivateAddr.getPointer(),
1017             SharedAddresses[N].first.getAddress(CGF).getType());
1018     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1019     return castToBase(CGF, OrigVD->getType(),
1020                       SharedAddresses[N].first.getType(),
1021                       OriginalBaseLValue.getAddress(CGF).getType(),
1022                       OriginalBaseLValue.getAlignment(), Ptr);
1023   }
1024   BaseDecls.emplace_back(
1025       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1026   return PrivateAddr;
1027 }
1028 
1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1030   const OMPDeclareReductionDecl *DRD =
1031       getReductionInit(ClausesData[N].ReductionOp);
1032   return DRD && DRD->getInitializer();
1033 }
1034 
1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1036   return CGF.EmitLoadOfPointerLValue(
1037       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1038       getThreadIDVariable()->getType()->castAs<PointerType>());
1039 }
1040 
1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1042   if (!CGF.HaveInsertPoint())
1043     return;
1044   // 1.2.2 OpenMP Language Terminology
1045   // Structured block - An executable statement with a single entry at the
1046   // top and a single exit at the bottom.
1047   // The point of exit cannot be a branch out of the structured block.
1048   // longjmp() and throw() must not violate the entry/exit criteria.
1049   CGF.EHStack.pushTerminate();
1050   if (S)
1051     CGF.incrementProfileCounter(S);
1052   CodeGen(CGF);
1053   CGF.EHStack.popTerminate();
1054 }
1055 
1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1057     CodeGenFunction &CGF) {
1058   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1059                             getThreadIDVariable()->getType(),
1060                             AlignmentSource::Decl);
1061 }
1062 
1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1064                                        QualType FieldTy) {
1065   auto *Field = FieldDecl::Create(
1066       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1067       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1068       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1069   Field->setAccess(AS_public);
1070   DC->addDecl(Field);
1071   return Field;
1072 }
1073 
1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1075                                  StringRef Separator)
1076     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1077       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1078   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1079 
1080   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1081   OMPBuilder.initialize();
1082   loadOffloadInfoMetadata();
1083 }
1084 
1085 void CGOpenMPRuntime::clear() {
1086   InternalVars.clear();
1087   // Clean non-target variable declarations possibly used only in debug info.
1088   for (const auto &Data : EmittedNonTargetVariables) {
1089     if (!Data.getValue().pointsToAliveValue())
1090       continue;
1091     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1092     if (!GV)
1093       continue;
1094     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1095       continue;
1096     GV->eraseFromParent();
1097   }
1098 }
1099 
1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1101   SmallString<128> Buffer;
1102   llvm::raw_svector_ostream OS(Buffer);
1103   StringRef Sep = FirstSeparator;
1104   for (StringRef Part : Parts) {
1105     OS << Sep << Part;
1106     Sep = Separator;
1107   }
1108   return std::string(OS.str());
1109 }
1110 
1111 static llvm::Function *
1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1113                           const Expr *CombinerInitializer, const VarDecl *In,
1114                           const VarDecl *Out, bool IsCombiner) {
1115   // void .omp_combiner.(Ty *in, Ty *out);
1116   ASTContext &C = CGM.getContext();
1117   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1118   FunctionArgList Args;
1119   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1120                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1122                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1123   Args.push_back(&OmpOutParm);
1124   Args.push_back(&OmpInParm);
1125   const CGFunctionInfo &FnInfo =
1126       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1127   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1128   std::string Name = CGM.getOpenMPRuntime().getName(
1129       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1130   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1131                                     Name, &CGM.getModule());
1132   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1133   if (CGM.getLangOpts().Optimize) {
1134     Fn->removeFnAttr(llvm::Attribute::NoInline);
1135     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1136     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1137   }
1138   CodeGenFunction CGF(CGM);
1139   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1140   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1141   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1142                     Out->getLocation());
1143   CodeGenFunction::OMPPrivateScope Scope(CGF);
1144   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1145   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1146     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1147         .getAddress(CGF);
1148   });
1149   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1150   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   (void)Scope.Privatize();
1155   if (!IsCombiner && Out->hasInit() &&
1156       !CGF.isTrivialInitializer(Out->getInit())) {
1157     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1158                          Out->getType().getQualifiers(),
1159                          /*IsInitializer=*/true);
1160   }
1161   if (CombinerInitializer)
1162     CGF.EmitIgnoredExpr(CombinerInitializer);
1163   Scope.ForceCleanup();
1164   CGF.FinishFunction();
1165   return Fn;
1166 }
1167 
1168 void CGOpenMPRuntime::emitUserDefinedReduction(
1169     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1170   if (UDRMap.count(D) > 0)
1171     return;
1172   llvm::Function *Combiner = emitCombinerOrInitializer(
1173       CGM, D->getType(), D->getCombiner(),
1174       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1176       /*IsCombiner=*/true);
1177   llvm::Function *Initializer = nullptr;
1178   if (const Expr *Init = D->getInitializer()) {
1179     Initializer = emitCombinerOrInitializer(
1180         CGM, D->getType(),
1181         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1182                                                                      : nullptr,
1183         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1185         /*IsCombiner=*/false);
1186   }
1187   UDRMap.try_emplace(D, Combiner, Initializer);
1188   if (CGF) {
1189     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1190     Decls.second.push_back(D);
1191   }
1192 }
1193 
1194 std::pair<llvm::Function *, llvm::Function *>
1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1196   auto I = UDRMap.find(D);
1197   if (I != UDRMap.end())
1198     return I->second;
1199   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1200   return UDRMap.lookup(D);
1201 }
1202 
1203 namespace {
1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1205 // Builder if one is present.
1206 struct PushAndPopStackRAII {
1207   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1208                       bool HasCancel, llvm::omp::Directive Kind)
1209       : OMPBuilder(OMPBuilder) {
1210     if (!OMPBuilder)
1211       return;
1212 
1213     // The following callback is the crucial part of clangs cleanup process.
1214     //
1215     // NOTE:
1216     // Once the OpenMPIRBuilder is used to create parallel regions (and
1217     // similar), the cancellation destination (Dest below) is determined via
1218     // IP. That means if we have variables to finalize we split the block at IP,
1219     // use the new block (=BB) as destination to build a JumpDest (via
1220     // getJumpDestInCurrentScope(BB)) which then is fed to
1221     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1222     // to push & pop an FinalizationInfo object.
1223     // The FiniCB will still be needed but at the point where the
1224     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1225     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1226       assert(IP.getBlock()->end() == IP.getPoint() &&
1227              "Clang CG should cause non-terminated block!");
1228       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1229       CGF.Builder.restoreIP(IP);
1230       CodeGenFunction::JumpDest Dest =
1231           CGF.getOMPCancelDestination(OMPD_parallel);
1232       CGF.EmitBranchThroughCleanup(Dest);
1233     };
1234 
1235     // TODO: Remove this once we emit parallel regions through the
1236     //       OpenMPIRBuilder as it can do this setup internally.
1237     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1238     OMPBuilder->pushFinalizationCB(std::move(FI));
1239   }
1240   ~PushAndPopStackRAII() {
1241     if (OMPBuilder)
1242       OMPBuilder->popFinalizationCB();
1243   }
1244   llvm::OpenMPIRBuilder *OMPBuilder;
1245 };
1246 } // namespace
1247 
1248 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1249     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1250     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1251     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1252   assert(ThreadIDVar->getType()->isPointerType() &&
1253          "thread id variable must be of type kmp_int32 *");
1254   CodeGenFunction CGF(CGM, true);
1255   bool HasCancel = false;
1256   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1259     HasCancel = OPD->hasCancel();
1260   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1261     HasCancel = OPSD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268   else if (const auto *OPFD =
1269                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD =
1272                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1273     HasCancel = OPFD->hasCancel();
1274 
1275   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1276   //       parallel region to make cancellation barriers work properly.
1277   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1278   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1279   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1280                                     HasCancel, OutlinedHelperName);
1281   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1282   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1288   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1289   return emitParallelOrTeamsOutlinedFunction(
1290       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1291 }
1292 
1293 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1294     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1295     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1296   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1297   return emitParallelOrTeamsOutlinedFunction(
1298       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1299 }
1300 
1301 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1302     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1303     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1304     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1305     bool Tied, unsigned &NumberOfParts) {
1306   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1307                                               PrePostActionTy &) {
1308     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1309     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1310     llvm::Value *TaskArgs[] = {
1311         UpLoc, ThreadID,
1312         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1313                                     TaskTVar->getType()->castAs<PointerType>())
1314             .getPointer(CGF)};
1315     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1316                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1317                         TaskArgs);
1318   };
1319   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1320                                                             UntiedCodeGen);
1321   CodeGen.setAction(Action);
1322   assert(!ThreadIDVar->getType()->isPointerType() &&
1323          "thread id variable must be of type kmp_int32 for tasks");
1324   const OpenMPDirectiveKind Region =
1325       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1326                                                       : OMPD_task;
1327   const CapturedStmt *CS = D.getCapturedStmt(Region);
1328   bool HasCancel = false;
1329   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337 
1338   CodeGenFunction CGF(CGM, true);
1339   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1340                                         InnermostKind, HasCancel, Action);
1341   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1342   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1343   if (!Tied)
1344     NumberOfParts = Action.getNumberOfParts();
1345   return Res;
1346 }
1347 
1348 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1349                              const RecordDecl *RD, const CGRecordLayout &RL,
1350                              ArrayRef<llvm::Constant *> Data) {
1351   llvm::StructType *StructTy = RL.getLLVMType();
1352   unsigned PrevIdx = 0;
1353   ConstantInitBuilder CIBuilder(CGM);
1354   auto DI = Data.begin();
1355   for (const FieldDecl *FD : RD->fields()) {
1356     unsigned Idx = RL.getLLVMFieldNo(FD);
1357     // Fill the alignment.
1358     for (unsigned I = PrevIdx; I < Idx; ++I)
1359       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1360     PrevIdx = Idx + 1;
1361     Fields.add(*DI);
1362     ++DI;
1363   }
1364 }
1365 
1366 template <class... As>
1367 static llvm::GlobalVariable *
1368 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1369                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1370                    As &&... Args) {
1371   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1372   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1373   ConstantInitBuilder CIBuilder(CGM);
1374   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1375   buildStructValue(Fields, CGM, RD, RL, Data);
1376   return Fields.finishAndCreateGlobal(
1377       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1378       std::forward<As>(Args)...);
1379 }
1380 
1381 template <typename T>
1382 static void
1383 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1384                                          ArrayRef<llvm::Constant *> Data,
1385                                          T &Parent) {
1386   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1387   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1388   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1389   buildStructValue(Fields, CGM, RD, RL, Data);
1390   Fields.finishAndAddTo(Parent);
1391 }
1392 
1393 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1394                                              bool AtCurrentPoint) {
1395   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1396   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1397 
1398   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1399   if (AtCurrentPoint) {
1400     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1401         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1402   } else {
1403     Elem.second.ServiceInsertPt =
1404         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1405     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1406   }
1407 }
1408 
1409 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1410   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1411   if (Elem.second.ServiceInsertPt) {
1412     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1413     Elem.second.ServiceInsertPt = nullptr;
1414     Ptr->eraseFromParent();
1415   }
1416 }
1417 
1418 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1419                                                   SourceLocation Loc,
1420                                                   SmallString<128> &Buffer) {
1421   llvm::raw_svector_ostream OS(Buffer);
1422   // Build debug location
1423   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1424   OS << ";" << PLoc.getFilename() << ";";
1425   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1426     OS << FD->getQualifiedNameAsString();
1427   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1428   return OS.str();
1429 }
1430 
1431 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1432                                                  SourceLocation Loc,
1433                                                  unsigned Flags) {
1434   llvm::Constant *SrcLocStr;
1435   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1436       Loc.isInvalid()) {
1437     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1438   } else {
1439     std::string FunctionName = "";
1440     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1441       FunctionName = FD->getQualifiedNameAsString();
1442     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1443     const char *FileName = PLoc.getFilename();
1444     unsigned Line = PLoc.getLine();
1445     unsigned Column = PLoc.getColumn();
1446     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1447                                                 Line, Column);
1448   }
1449   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1450   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1451                                      Reserved2Flags);
1452 }
1453 
1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1455                                           SourceLocation Loc) {
1456   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1457   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1458   // the clang invariants used below might be broken.
1459   if (CGM.getLangOpts().OpenMPIRBuilder) {
1460     SmallString<128> Buffer;
1461     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1462     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1464     return OMPBuilder.getOrCreateThreadID(
1465         OMPBuilder.getOrCreateIdent(SrcLocStr));
1466   }
1467 
1468   llvm::Value *ThreadID = nullptr;
1469   // Check whether we've already cached a load of the thread id in this
1470   // function.
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end()) {
1473     ThreadID = I->second.ThreadID;
1474     if (ThreadID != nullptr)
1475       return ThreadID;
1476   }
1477   // If exceptions are enabled, do not use parameter to avoid possible crash.
1478   if (auto *OMPRegionInfo =
1479           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480     if (OMPRegionInfo->getThreadIDVariable()) {
1481       // Check if this an outlined function with thread id passed as argument.
1482       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485           !CGF.getLangOpts().CXXExceptions ||
1486           CGF.Builder.GetInsertBlock() == TopBlock ||
1487           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               TopBlock ||
1490           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491               CGF.Builder.GetInsertBlock()) {
1492         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493         // If value loaded in entry block, cache it and use it everywhere in
1494         // function.
1495         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497           Elem.second.ThreadID = ThreadID;
1498         }
1499         return ThreadID;
1500       }
1501     }
1502   }
1503 
1504   // This is not an outlined function region - need to call __kmpc_int32
1505   // kmpc_global_thread_num(ident_t *loc).
1506   // Generate thread id value and cache this value for use across the
1507   // function.
1508   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509   if (!Elem.second.ServiceInsertPt)
1510     setLocThreadIdInsertPt(CGF);
1511   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513   llvm::CallInst *Call = CGF.Builder.CreateCall(
1514       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515                                             OMPRTL___kmpc_global_thread_num),
1516       emitUpdateLocation(CGF, Loc));
1517   Call->setCallingConv(CGF.getRuntimeCC());
1518   Elem.second.ThreadID = Call;
1519   return Call;
1520 }
1521 
1522 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525     clearLocThreadIdInsertPt(CGF);
1526     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527   }
1528   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530       UDRMap.erase(D);
1531     FunctionUDRMap.erase(CGF.CurFn);
1532   }
1533   auto I = FunctionUDMMap.find(CGF.CurFn);
1534   if (I != FunctionUDMMap.end()) {
1535     for(const auto *D : I->second)
1536       UDMMap.erase(D);
1537     FunctionUDMMap.erase(I);
1538   }
1539   LastprivateConditionalToTypes.erase(CGF.CurFn);
1540   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544   return OMPBuilder.IdentPtr;
1545 }
1546 
1547 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548   if (!Kmpc_MicroTy) {
1549     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553   }
1554   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555 }
1556 
1557 llvm::FunctionCallee
1558 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1559   assert((IVSize == 32 || IVSize == 64) &&
1560          "IV size is not compatible with the omp runtime");
1561   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1562                                             : "__kmpc_for_static_init_4u")
1563                                 : (IVSigned ? "__kmpc_for_static_init_8"
1564                                             : "__kmpc_for_static_init_8u");
1565   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1566   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1567   llvm::Type *TypeParams[] = {
1568     getIdentTyPointerTy(),                     // loc
1569     CGM.Int32Ty,                               // tid
1570     CGM.Int32Ty,                               // schedtype
1571     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1572     PtrTy,                                     // p_lower
1573     PtrTy,                                     // p_upper
1574     PtrTy,                                     // p_stride
1575     ITy,                                       // incr
1576     ITy                                        // chunk
1577   };
1578   auto *FnTy =
1579       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1580   return CGM.CreateRuntimeFunction(FnTy, Name);
1581 }
1582 
1583 llvm::FunctionCallee
1584 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1585   assert((IVSize == 32 || IVSize == 64) &&
1586          "IV size is not compatible with the omp runtime");
1587   StringRef Name =
1588       IVSize == 32
1589           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1590           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1591   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1592   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1593                                CGM.Int32Ty,           // tid
1594                                CGM.Int32Ty,           // schedtype
1595                                ITy,                   // lower
1596                                ITy,                   // upper
1597                                ITy,                   // stride
1598                                ITy                    // chunk
1599   };
1600   auto *FnTy =
1601       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1602   return CGM.CreateRuntimeFunction(FnTy, Name);
1603 }
1604 
1605 llvm::FunctionCallee
1606 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1607   assert((IVSize == 32 || IVSize == 64) &&
1608          "IV size is not compatible with the omp runtime");
1609   StringRef Name =
1610       IVSize == 32
1611           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1612           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1613   llvm::Type *TypeParams[] = {
1614       getIdentTyPointerTy(), // loc
1615       CGM.Int32Ty,           // tid
1616   };
1617   auto *FnTy =
1618       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1619   return CGM.CreateRuntimeFunction(FnTy, Name);
1620 }
1621 
1622 llvm::FunctionCallee
1623 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1624   assert((IVSize == 32 || IVSize == 64) &&
1625          "IV size is not compatible with the omp runtime");
1626   StringRef Name =
1627       IVSize == 32
1628           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1629           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1630   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1631   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1632   llvm::Type *TypeParams[] = {
1633     getIdentTyPointerTy(),                     // loc
1634     CGM.Int32Ty,                               // tid
1635     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1636     PtrTy,                                     // p_lower
1637     PtrTy,                                     // p_upper
1638     PtrTy                                      // p_stride
1639   };
1640   auto *FnTy =
1641       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1642   return CGM.CreateRuntimeFunction(FnTy, Name);
1643 }
1644 
1645 /// Obtain information that uniquely identifies a target entry. This
1646 /// consists of the file and device IDs as well as line number associated with
1647 /// the relevant entry source location.
1648 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1649                                      unsigned &DeviceID, unsigned &FileID,
1650                                      unsigned &LineNum) {
1651   SourceManager &SM = C.getSourceManager();
1652 
1653   // The loc should be always valid and have a file ID (the user cannot use
1654   // #pragma directives in macros)
1655 
1656   assert(Loc.isValid() && "Source location is expected to be always valid.");
1657 
1658   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1659   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1660 
1661   llvm::sys::fs::UniqueID ID;
1662   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1663     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1664     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1666       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1667           << PLoc.getFilename() << EC.message();
1668   }
1669 
1670   DeviceID = ID.getDevice();
1671   FileID = ID.getFile();
1672   LineNum = PLoc.getLine();
1673 }
1674 
1675 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1676   if (CGM.getLangOpts().OpenMPSimd)
1677     return Address::invalid();
1678   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1679       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1680   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1681               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1682                HasRequiresUnifiedSharedMemory))) {
1683     SmallString<64> PtrName;
1684     {
1685       llvm::raw_svector_ostream OS(PtrName);
1686       OS << CGM.getMangledName(GlobalDecl(VD));
1687       if (!VD->isExternallyVisible()) {
1688         unsigned DeviceID, FileID, Line;
1689         getTargetEntryUniqueInfo(CGM.getContext(),
1690                                  VD->getCanonicalDecl()->getBeginLoc(),
1691                                  DeviceID, FileID, Line);
1692         OS << llvm::format("_%x", FileID);
1693       }
1694       OS << "_decl_tgt_ref_ptr";
1695     }
1696     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1697     if (!Ptr) {
1698       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1699       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1700                                         PtrName);
1701 
1702       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1703       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1704 
1705       if (!CGM.getLangOpts().OpenMPIsDevice)
1706         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1707       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1708     }
1709     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1710   }
1711   return Address::invalid();
1712 }
1713 
1714 llvm::Constant *
1715 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1716   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1717          !CGM.getContext().getTargetInfo().isTLSSupported());
1718   // Lookup the entry, lazily creating it if necessary.
1719   std::string Suffix = getName({"cache", ""});
1720   return getOrCreateInternalVariable(
1721       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1722 }
1723 
1724 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1725                                                 const VarDecl *VD,
1726                                                 Address VDAddr,
1727                                                 SourceLocation Loc) {
1728   if (CGM.getLangOpts().OpenMPUseTLS &&
1729       CGM.getContext().getTargetInfo().isTLSSupported())
1730     return VDAddr;
1731 
1732   llvm::Type *VarTy = VDAddr.getElementType();
1733   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1734                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1735                                                        CGM.Int8PtrTy),
1736                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1737                          getOrCreateThreadPrivateCache(VD)};
1738   return Address(CGF.EmitRuntimeCall(
1739                      OMPBuilder.getOrCreateRuntimeFunction(
1740                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1741                      Args),
1742                  VDAddr.getAlignment());
1743 }
1744 
1745 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1746     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1747     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1748   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1749   // library.
1750   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1751   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1752                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1753                       OMPLoc);
1754   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1755   // to register constructor/destructor for variable.
1756   llvm::Value *Args[] = {
1757       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1758       Ctor, CopyCtor, Dtor};
1759   CGF.EmitRuntimeCall(
1760       OMPBuilder.getOrCreateRuntimeFunction(
1761           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1762       Args);
1763 }
1764 
1765 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1766     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1767     bool PerformInit, CodeGenFunction *CGF) {
1768   if (CGM.getLangOpts().OpenMPUseTLS &&
1769       CGM.getContext().getTargetInfo().isTLSSupported())
1770     return nullptr;
1771 
1772   VD = VD->getDefinition(CGM.getContext());
1773   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1774     QualType ASTTy = VD->getType();
1775 
1776     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1777     const Expr *Init = VD->getAnyInitializer();
1778     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1779       // Generate function that re-emits the declaration's initializer into the
1780       // threadprivate copy of the variable VD
1781       CodeGenFunction CtorCGF(CGM);
1782       FunctionArgList Args;
1783       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1784                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1785                             ImplicitParamDecl::Other);
1786       Args.push_back(&Dst);
1787 
1788       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1789           CGM.getContext().VoidPtrTy, Args);
1790       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1791       std::string Name = getName({"__kmpc_global_ctor_", ""});
1792       llvm::Function *Fn =
1793           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1794       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1795                             Args, Loc, Loc);
1796       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1797           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1798           CGM.getContext().VoidPtrTy, Dst.getLocation());
1799       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1800       Arg = CtorCGF.Builder.CreateElementBitCast(
1801           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1802       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1803                                /*IsInitializer=*/true);
1804       ArgVal = CtorCGF.EmitLoadOfScalar(
1805           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1806           CGM.getContext().VoidPtrTy, Dst.getLocation());
1807       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1808       CtorCGF.FinishFunction();
1809       Ctor = Fn;
1810     }
1811     if (VD->getType().isDestructedType() != QualType::DK_none) {
1812       // Generate function that emits destructor call for the threadprivate copy
1813       // of the variable VD
1814       CodeGenFunction DtorCGF(CGM);
1815       FunctionArgList Args;
1816       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1817                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1818                             ImplicitParamDecl::Other);
1819       Args.push_back(&Dst);
1820 
1821       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1822           CGM.getContext().VoidTy, Args);
1823       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1824       std::string Name = getName({"__kmpc_global_dtor_", ""});
1825       llvm::Function *Fn =
1826           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1827       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1828       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1829                             Loc, Loc);
1830       // Create a scope with an artificial location for the body of this function.
1831       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1832       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1833           DtorCGF.GetAddrOfLocalVar(&Dst),
1834           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1835       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1836                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1837                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1838       DtorCGF.FinishFunction();
1839       Dtor = Fn;
1840     }
1841     // Do not emit init function if it is not required.
1842     if (!Ctor && !Dtor)
1843       return nullptr;
1844 
1845     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1846     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1847                                                /*isVarArg=*/false)
1848                            ->getPointerTo();
1849     // Copying constructor for the threadprivate variable.
1850     // Must be NULL - reserved by runtime, but currently it requires that this
1851     // parameter is always NULL. Otherwise it fires assertion.
1852     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1853     if (Ctor == nullptr) {
1854       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1855                                              /*isVarArg=*/false)
1856                          ->getPointerTo();
1857       Ctor = llvm::Constant::getNullValue(CtorTy);
1858     }
1859     if (Dtor == nullptr) {
1860       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1861                                              /*isVarArg=*/false)
1862                          ->getPointerTo();
1863       Dtor = llvm::Constant::getNullValue(DtorTy);
1864     }
1865     if (!CGF) {
1866       auto *InitFunctionTy =
1867           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1868       std::string Name = getName({"__omp_threadprivate_init_", ""});
1869       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1870           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1871       CodeGenFunction InitCGF(CGM);
1872       FunctionArgList ArgList;
1873       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1874                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1875                             Loc, Loc);
1876       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1877       InitCGF.FinishFunction();
1878       return InitFunction;
1879     }
1880     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1881   }
1882   return nullptr;
1883 }
1884 
1885 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1886                                                      llvm::GlobalVariable *Addr,
1887                                                      bool PerformInit) {
1888   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1889       !CGM.getLangOpts().OpenMPIsDevice)
1890     return false;
1891   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1892       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1893   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1894       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1895        HasRequiresUnifiedSharedMemory))
1896     return CGM.getLangOpts().OpenMPIsDevice;
1897   VD = VD->getDefinition(CGM.getContext());
1898   assert(VD && "Unknown VarDecl");
1899 
1900   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902 
1903   QualType ASTTy = VD->getType();
1904   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1905 
1906   // Produce the unique prefix to identify the new target regions. We use
1907   // the source location of the variable declaration which we know to not
1908   // conflict with any target region.
1909   unsigned DeviceID;
1910   unsigned FileID;
1911   unsigned Line;
1912   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1913   SmallString<128> Buffer, Out;
1914   {
1915     llvm::raw_svector_ostream OS(Buffer);
1916     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1917        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1918   }
1919 
1920   const Expr *Init = VD->getAnyInitializer();
1921   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1922     llvm::Constant *Ctor;
1923     llvm::Constant *ID;
1924     if (CGM.getLangOpts().OpenMPIsDevice) {
1925       // Generate function that re-emits the declaration's initializer into
1926       // the threadprivate copy of the variable VD
1927       CodeGenFunction CtorCGF(CGM);
1928 
1929       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1930       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1931       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1932           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1933       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1934       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1935                             FunctionArgList(), Loc, Loc);
1936       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1937       CtorCGF.EmitAnyExprToMem(Init,
1938                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1939                                Init->getType().getQualifiers(),
1940                                /*IsInitializer=*/true);
1941       CtorCGF.FinishFunction();
1942       Ctor = Fn;
1943       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1944       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1945     } else {
1946       Ctor = new llvm::GlobalVariable(
1947           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1948           llvm::GlobalValue::PrivateLinkage,
1949           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1950       ID = Ctor;
1951     }
1952 
1953     // Register the information for the entry associated with the constructor.
1954     Out.clear();
1955     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1956         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1957         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1958   }
1959   if (VD->getType().isDestructedType() != QualType::DK_none) {
1960     llvm::Constant *Dtor;
1961     llvm::Constant *ID;
1962     if (CGM.getLangOpts().OpenMPIsDevice) {
1963       // Generate function that emits destructor call for the threadprivate
1964       // copy of the variable VD
1965       CodeGenFunction DtorCGF(CGM);
1966 
1967       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1968       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1969       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1970           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1971       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1972       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1973                             FunctionArgList(), Loc, Loc);
1974       // Create a scope with an artificial location for the body of this
1975       // function.
1976       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1977       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1978                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1979                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1980       DtorCGF.FinishFunction();
1981       Dtor = Fn;
1982       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1983       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1984     } else {
1985       Dtor = new llvm::GlobalVariable(
1986           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1987           llvm::GlobalValue::PrivateLinkage,
1988           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1989       ID = Dtor;
1990     }
1991     // Register the information for the entry associated with the destructor.
1992     Out.clear();
1993     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1994         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1995         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1996   }
1997   return CGM.getLangOpts().OpenMPIsDevice;
1998 }
1999 
2000 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2001                                                           QualType VarType,
2002                                                           StringRef Name) {
2003   std::string Suffix = getName({"artificial", ""});
2004   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2005   llvm::Value *GAddr =
2006       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2007   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2008       CGM.getTarget().isTLSSupported()) {
2009     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2010     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2011   }
2012   std::string CacheSuffix = getName({"cache", ""});
2013   llvm::Value *Args[] = {
2014       emitUpdateLocation(CGF, SourceLocation()),
2015       getThreadID(CGF, SourceLocation()),
2016       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2017       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2018                                 /*isSigned=*/false),
2019       getOrCreateInternalVariable(
2020           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2021   return Address(
2022       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2023           CGF.EmitRuntimeCall(
2024               OMPBuilder.getOrCreateRuntimeFunction(
2025                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2026               Args),
2027           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2028       CGM.getContext().getTypeAlignInChars(VarType));
2029 }
2030 
2031 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2032                                    const RegionCodeGenTy &ThenGen,
2033                                    const RegionCodeGenTy &ElseGen) {
2034   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2035 
2036   // If the condition constant folds and can be elided, try to avoid emitting
2037   // the condition and the dead arm of the if/else.
2038   bool CondConstant;
2039   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2040     if (CondConstant)
2041       ThenGen(CGF);
2042     else
2043       ElseGen(CGF);
2044     return;
2045   }
2046 
2047   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2048   // emit the conditional branch.
2049   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2050   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2051   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2052   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2053 
2054   // Emit the 'then' code.
2055   CGF.EmitBlock(ThenBlock);
2056   ThenGen(CGF);
2057   CGF.EmitBranch(ContBlock);
2058   // Emit the 'else' code if present.
2059   // There is no need to emit line number for unconditional branch.
2060   (void)ApplyDebugLocation::CreateEmpty(CGF);
2061   CGF.EmitBlock(ElseBlock);
2062   ElseGen(CGF);
2063   // There is no need to emit line number for unconditional branch.
2064   (void)ApplyDebugLocation::CreateEmpty(CGF);
2065   CGF.EmitBranch(ContBlock);
2066   // Emit the continuation block for code after the if.
2067   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2068 }
2069 
2070 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2071                                        llvm::Function *OutlinedFn,
2072                                        ArrayRef<llvm::Value *> CapturedVars,
2073                                        const Expr *IfCond) {
2074   if (!CGF.HaveInsertPoint())
2075     return;
2076   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2077   auto &M = CGM.getModule();
2078   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2079                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2080     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *Args[] = {
2083         RTLoc,
2084         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2085         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2086     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2087     RealArgs.append(std::begin(Args), std::end(Args));
2088     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2089 
2090     llvm::FunctionCallee RTLFn =
2091         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2092     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2093   };
2094   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2095                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2096     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2097     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2098     // Build calls:
2099     // __kmpc_serialized_parallel(&Loc, GTid);
2100     llvm::Value *Args[] = {RTLoc, ThreadID};
2101     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102                             M, OMPRTL___kmpc_serialized_parallel),
2103                         Args);
2104 
2105     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2106     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2107     Address ZeroAddrBound =
2108         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2109                                          /*Name=*/".bound.zero.addr");
2110     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2111     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2112     // ThreadId for serialized parallels is 0.
2113     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2114     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2115     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2116 
2117     // Ensure we do not inline the function. This is trivially true for the ones
2118     // passed to __kmpc_fork_call but the ones calles in serialized regions
2119     // could be inlined. This is not a perfect but it is closer to the invariant
2120     // we want, namely, every data environment starts with a new function.
2121     // TODO: We should pass the if condition to the runtime function and do the
2122     //       handling there. Much cleaner code.
2123     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2124     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2125 
2126     // __kmpc_end_serialized_parallel(&Loc, GTid);
2127     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2128     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2129                             M, OMPRTL___kmpc_end_serialized_parallel),
2130                         EndArgs);
2131   };
2132   if (IfCond) {
2133     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2134   } else {
2135     RegionCodeGenTy ThenRCG(ThenGen);
2136     ThenRCG(CGF);
2137   }
2138 }
2139 
2140 // If we're inside an (outlined) parallel region, use the region info's
2141 // thread-ID variable (it is passed in a first argument of the outlined function
2142 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2143 // regular serial code region, get thread ID by calling kmp_int32
2144 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2145 // return the address of that temp.
2146 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2147                                              SourceLocation Loc) {
2148   if (auto *OMPRegionInfo =
2149           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2150     if (OMPRegionInfo->getThreadIDVariable())
2151       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2152 
2153   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2154   QualType Int32Ty =
2155       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2156   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2157   CGF.EmitStoreOfScalar(ThreadID,
2158                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2159 
2160   return ThreadIDTemp;
2161 }
2162 
2163 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2164     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2165   SmallString<256> Buffer;
2166   llvm::raw_svector_ostream Out(Buffer);
2167   Out << Name;
2168   StringRef RuntimeName = Out.str();
2169   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2170   if (Elem.second) {
2171     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2172            "OMP internal variable has different type than requested");
2173     return &*Elem.second;
2174   }
2175 
2176   return Elem.second = new llvm::GlobalVariable(
2177              CGM.getModule(), Ty, /*IsConstant*/ false,
2178              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2179              Elem.first(), /*InsertBefore=*/nullptr,
2180              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2181 }
2182 
2183 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2184   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2185   std::string Name = getName({Prefix, "var"});
2186   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2187 }
2188 
2189 namespace {
2190 /// Common pre(post)-action for different OpenMP constructs.
2191 class CommonActionTy final : public PrePostActionTy {
2192   llvm::FunctionCallee EnterCallee;
2193   ArrayRef<llvm::Value *> EnterArgs;
2194   llvm::FunctionCallee ExitCallee;
2195   ArrayRef<llvm::Value *> ExitArgs;
2196   bool Conditional;
2197   llvm::BasicBlock *ContBlock = nullptr;
2198 
2199 public:
2200   CommonActionTy(llvm::FunctionCallee EnterCallee,
2201                  ArrayRef<llvm::Value *> EnterArgs,
2202                  llvm::FunctionCallee ExitCallee,
2203                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2204       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2205         ExitArgs(ExitArgs), Conditional(Conditional) {}
2206   void Enter(CodeGenFunction &CGF) override {
2207     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2208     if (Conditional) {
2209       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2210       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2211       ContBlock = CGF.createBasicBlock("omp_if.end");
2212       // Generate the branch (If-stmt)
2213       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2214       CGF.EmitBlock(ThenBlock);
2215     }
2216   }
2217   void Done(CodeGenFunction &CGF) {
2218     // Emit the rest of blocks/branches
2219     CGF.EmitBranch(ContBlock);
2220     CGF.EmitBlock(ContBlock, true);
2221   }
2222   void Exit(CodeGenFunction &CGF) override {
2223     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2224   }
2225 };
2226 } // anonymous namespace
2227 
2228 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2229                                          StringRef CriticalName,
2230                                          const RegionCodeGenTy &CriticalOpGen,
2231                                          SourceLocation Loc, const Expr *Hint) {
2232   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2233   // CriticalOpGen();
2234   // __kmpc_end_critical(ident_t *, gtid, Lock);
2235   // Prepare arguments and build a call to __kmpc_critical
2236   if (!CGF.HaveInsertPoint())
2237     return;
2238   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2239                          getCriticalRegionLock(CriticalName)};
2240   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2241                                                 std::end(Args));
2242   if (Hint) {
2243     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2244         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2245   }
2246   CommonActionTy Action(
2247       OMPBuilder.getOrCreateRuntimeFunction(
2248           CGM.getModule(),
2249           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2250       EnterArgs,
2251       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2252                                             OMPRTL___kmpc_end_critical),
2253       Args);
2254   CriticalOpGen.setAction(Action);
2255   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2256 }
2257 
2258 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2259                                        const RegionCodeGenTy &MasterOpGen,
2260                                        SourceLocation Loc) {
2261   if (!CGF.HaveInsertPoint())
2262     return;
2263   // if(__kmpc_master(ident_t *, gtid)) {
2264   //   MasterOpGen();
2265   //   __kmpc_end_master(ident_t *, gtid);
2266   // }
2267   // Prepare arguments and build a call to __kmpc_master
2268   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2269   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_master),
2271                         Args,
2272                         OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_end_master),
2274                         Args,
2275                         /*Conditional=*/true);
2276   MasterOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2278   Action.Done(CGF);
2279 }
2280 
2281 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2282                                        const RegionCodeGenTy &MaskedOpGen,
2283                                        SourceLocation Loc, const Expr *Filter) {
2284   if (!CGF.HaveInsertPoint())
2285     return;
2286   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2287   //   MaskedOpGen();
2288   //   __kmpc_end_masked(iden_t *, gtid);
2289   // }
2290   // Prepare arguments and build a call to __kmpc_masked
2291   llvm::Value *FilterVal = Filter
2292                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2293                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2294   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2295                          FilterVal};
2296   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2297                             getThreadID(CGF, Loc)};
2298   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2299                             CGM.getModule(), OMPRTL___kmpc_masked),
2300                         Args,
2301                         OMPBuilder.getOrCreateRuntimeFunction(
2302                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2303                         ArgsEnd,
2304                         /*Conditional=*/true);
2305   MaskedOpGen.setAction(Action);
2306   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2307   Action.Done(CGF);
2308 }
2309 
2310 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2311                                         SourceLocation Loc) {
2312   if (!CGF.HaveInsertPoint())
2313     return;
2314   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2315     OMPBuilder.createTaskyield(CGF.Builder);
2316   } else {
2317     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2318     llvm::Value *Args[] = {
2319         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2320         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2321     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2322                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2323                         Args);
2324   }
2325 
2326   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2327     Region->emitUntiedSwitch(CGF);
2328 }
2329 
2330 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2331                                           const RegionCodeGenTy &TaskgroupOpGen,
2332                                           SourceLocation Loc) {
2333   if (!CGF.HaveInsertPoint())
2334     return;
2335   // __kmpc_taskgroup(ident_t *, gtid);
2336   // TaskgroupOpGen();
2337   // __kmpc_end_taskgroup(ident_t *, gtid);
2338   // Prepare arguments and build a call to __kmpc_taskgroup
2339   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2340   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2341                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2342                         Args,
2343                         OMPBuilder.getOrCreateRuntimeFunction(
2344                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2345                         Args);
2346   TaskgroupOpGen.setAction(Action);
2347   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2348 }
2349 
2350 /// Given an array of pointers to variables, project the address of a
2351 /// given variable.
2352 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2353                                       unsigned Index, const VarDecl *Var) {
2354   // Pull out the pointer to the variable.
2355   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2356   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2357 
2358   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2359   Addr = CGF.Builder.CreateElementBitCast(
2360       Addr, CGF.ConvertTypeForMem(Var->getType()));
2361   return Addr;
2362 }
2363 
2364 static llvm::Value *emitCopyprivateCopyFunction(
2365     CodeGenModule &CGM, llvm::Type *ArgsType,
2366     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2367     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2368     SourceLocation Loc) {
2369   ASTContext &C = CGM.getContext();
2370   // void copy_func(void *LHSArg, void *RHSArg);
2371   FunctionArgList Args;
2372   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2373                            ImplicitParamDecl::Other);
2374   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2375                            ImplicitParamDecl::Other);
2376   Args.push_back(&LHSArg);
2377   Args.push_back(&RHSArg);
2378   const auto &CGFI =
2379       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2380   std::string Name =
2381       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2382   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2383                                     llvm::GlobalValue::InternalLinkage, Name,
2384                                     &CGM.getModule());
2385   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2386   Fn->setDoesNotRecurse();
2387   CodeGenFunction CGF(CGM);
2388   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2389   // Dest = (void*[n])(LHSArg);
2390   // Src = (void*[n])(RHSArg);
2391   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2392       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2393       ArgsType), CGF.getPointerAlign());
2394   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2395       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2396       ArgsType), CGF.getPointerAlign());
2397   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2398   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2399   // ...
2400   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2401   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2402     const auto *DestVar =
2403         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2404     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2405 
2406     const auto *SrcVar =
2407         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2408     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2409 
2410     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2411     QualType Type = VD->getType();
2412     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2413   }
2414   CGF.FinishFunction();
2415   return Fn;
2416 }
2417 
2418 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2419                                        const RegionCodeGenTy &SingleOpGen,
2420                                        SourceLocation Loc,
2421                                        ArrayRef<const Expr *> CopyprivateVars,
2422                                        ArrayRef<const Expr *> SrcExprs,
2423                                        ArrayRef<const Expr *> DstExprs,
2424                                        ArrayRef<const Expr *> AssignmentOps) {
2425   if (!CGF.HaveInsertPoint())
2426     return;
2427   assert(CopyprivateVars.size() == SrcExprs.size() &&
2428          CopyprivateVars.size() == DstExprs.size() &&
2429          CopyprivateVars.size() == AssignmentOps.size());
2430   ASTContext &C = CGM.getContext();
2431   // int32 did_it = 0;
2432   // if(__kmpc_single(ident_t *, gtid)) {
2433   //   SingleOpGen();
2434   //   __kmpc_end_single(ident_t *, gtid);
2435   //   did_it = 1;
2436   // }
2437   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2438   // <copy_func>, did_it);
2439 
2440   Address DidIt = Address::invalid();
2441   if (!CopyprivateVars.empty()) {
2442     // int32 did_it = 0;
2443     QualType KmpInt32Ty =
2444         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2445     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2446     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2447   }
2448   // Prepare arguments and build a call to __kmpc_single
2449   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2450   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2451                             CGM.getModule(), OMPRTL___kmpc_single),
2452                         Args,
2453                         OMPBuilder.getOrCreateRuntimeFunction(
2454                             CGM.getModule(), OMPRTL___kmpc_end_single),
2455                         Args,
2456                         /*Conditional=*/true);
2457   SingleOpGen.setAction(Action);
2458   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2459   if (DidIt.isValid()) {
2460     // did_it = 1;
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2462   }
2463   Action.Done(CGF);
2464   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2465   // <copy_func>, did_it);
2466   if (DidIt.isValid()) {
2467     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2468     QualType CopyprivateArrayTy = C.getConstantArrayType(
2469         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2470         /*IndexTypeQuals=*/0);
2471     // Create a list of all private variables for copyprivate.
2472     Address CopyprivateList =
2473         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2474     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2475       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2476       CGF.Builder.CreateStore(
2477           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2478               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2479               CGF.VoidPtrTy),
2480           Elem);
2481     }
2482     // Build function that copies private values from single region to all other
2483     // threads in the corresponding parallel region.
2484     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2485         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2486         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2487     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2488     Address CL =
2489       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2490                                                       CGF.VoidPtrTy);
2491     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2492     llvm::Value *Args[] = {
2493         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2494         getThreadID(CGF, Loc),        // i32 <gtid>
2495         BufSize,                      // size_t <buf_size>
2496         CL.getPointer(),              // void *<copyprivate list>
2497         CpyFn,                        // void (*) (void *, void *) <copy_func>
2498         DidItVal                      // i32 did_it
2499     };
2500     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2501                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2502                         Args);
2503   }
2504 }
2505 
2506 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2507                                         const RegionCodeGenTy &OrderedOpGen,
2508                                         SourceLocation Loc, bool IsThreads) {
2509   if (!CGF.HaveInsertPoint())
2510     return;
2511   // __kmpc_ordered(ident_t *, gtid);
2512   // OrderedOpGen();
2513   // __kmpc_end_ordered(ident_t *, gtid);
2514   // Prepare arguments and build a call to __kmpc_ordered
2515   if (IsThreads) {
2516     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2517     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2518                               CGM.getModule(), OMPRTL___kmpc_ordered),
2519                           Args,
2520                           OMPBuilder.getOrCreateRuntimeFunction(
2521                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2522                           Args);
2523     OrderedOpGen.setAction(Action);
2524     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2525     return;
2526   }
2527   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2528 }
2529 
2530 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2531   unsigned Flags;
2532   if (Kind == OMPD_for)
2533     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2534   else if (Kind == OMPD_sections)
2535     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2536   else if (Kind == OMPD_single)
2537     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2538   else if (Kind == OMPD_barrier)
2539     Flags = OMP_IDENT_BARRIER_EXPL;
2540   else
2541     Flags = OMP_IDENT_BARRIER_IMPL;
2542   return Flags;
2543 }
2544 
2545 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2546     CodeGenFunction &CGF, const OMPLoopDirective &S,
2547     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2548   // Check if the loop directive is actually a doacross loop directive. In this
2549   // case choose static, 1 schedule.
2550   if (llvm::any_of(
2551           S.getClausesOfKind<OMPOrderedClause>(),
2552           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2553     ScheduleKind = OMPC_SCHEDULE_static;
2554     // Chunk size is 1 in this case.
2555     llvm::APInt ChunkSize(32, 1);
2556     ChunkExpr = IntegerLiteral::Create(
2557         CGF.getContext(), ChunkSize,
2558         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2559         SourceLocation());
2560   }
2561 }
2562 
2563 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2564                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2565                                       bool ForceSimpleCall) {
2566   // Check if we should use the OMPBuilder
2567   auto *OMPRegionInfo =
2568       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2569   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2570     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2571         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2572     return;
2573   }
2574 
2575   if (!CGF.HaveInsertPoint())
2576     return;
2577   // Build call __kmpc_cancel_barrier(loc, thread_id);
2578   // Build call __kmpc_barrier(loc, thread_id);
2579   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2580   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2581   // thread_id);
2582   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2583                          getThreadID(CGF, Loc)};
2584   if (OMPRegionInfo) {
2585     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2586       llvm::Value *Result = CGF.EmitRuntimeCall(
2587           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2588                                                 OMPRTL___kmpc_cancel_barrier),
2589           Args);
2590       if (EmitChecks) {
2591         // if (__kmpc_cancel_barrier()) {
2592         //   exit from construct;
2593         // }
2594         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2595         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2596         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2597         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2598         CGF.EmitBlock(ExitBB);
2599         //   exit from construct;
2600         CodeGenFunction::JumpDest CancelDestination =
2601             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2602         CGF.EmitBranchThroughCleanup(CancelDestination);
2603         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2604       }
2605       return;
2606     }
2607   }
2608   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2609                           CGM.getModule(), OMPRTL___kmpc_barrier),
2610                       Args);
2611 }
2612 
2613 /// Map the OpenMP loop schedule to the runtime enumeration.
2614 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2615                                           bool Chunked, bool Ordered) {
2616   switch (ScheduleKind) {
2617   case OMPC_SCHEDULE_static:
2618     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2619                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2620   case OMPC_SCHEDULE_dynamic:
2621     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2622   case OMPC_SCHEDULE_guided:
2623     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2624   case OMPC_SCHEDULE_runtime:
2625     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2626   case OMPC_SCHEDULE_auto:
2627     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2628   case OMPC_SCHEDULE_unknown:
2629     assert(!Chunked && "chunk was specified but schedule kind not known");
2630     return Ordered ? OMP_ord_static : OMP_sch_static;
2631   }
2632   llvm_unreachable("Unexpected runtime schedule");
2633 }
2634 
2635 /// Map the OpenMP distribute schedule to the runtime enumeration.
2636 static OpenMPSchedType
2637 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2638   // only static is allowed for dist_schedule
2639   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2640 }
2641 
2642 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2643                                          bool Chunked) const {
2644   OpenMPSchedType Schedule =
2645       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2646   return Schedule == OMP_sch_static;
2647 }
2648 
2649 bool CGOpenMPRuntime::isStaticNonchunked(
2650     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2651   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2652   return Schedule == OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                       bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static_chunked;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticChunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static_chunked;
2666 }
2667 
2668 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2669   OpenMPSchedType Schedule =
2670       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2671   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2672   return Schedule != OMP_sch_static;
2673 }
2674 
2675 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2676                                   OpenMPScheduleClauseModifier M1,
2677                                   OpenMPScheduleClauseModifier M2) {
2678   int Modifier = 0;
2679   switch (M1) {
2680   case OMPC_SCHEDULE_MODIFIER_monotonic:
2681     Modifier = OMP_sch_modifier_monotonic;
2682     break;
2683   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2684     Modifier = OMP_sch_modifier_nonmonotonic;
2685     break;
2686   case OMPC_SCHEDULE_MODIFIER_simd:
2687     if (Schedule == OMP_sch_static_chunked)
2688       Schedule = OMP_sch_static_balanced_chunked;
2689     break;
2690   case OMPC_SCHEDULE_MODIFIER_last:
2691   case OMPC_SCHEDULE_MODIFIER_unknown:
2692     break;
2693   }
2694   switch (M2) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2710   // If the static schedule kind is specified or if the ordered clause is
2711   // specified, and if the nonmonotonic modifier is not specified, the effect is
2712   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2713   // modifier is specified, the effect is as if the nonmonotonic modifier is
2714   // specified.
2715   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2716     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2717           Schedule == OMP_sch_static_balanced_chunked ||
2718           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2719           Schedule == OMP_dist_sch_static_chunked ||
2720           Schedule == OMP_dist_sch_static))
2721       Modifier = OMP_sch_modifier_nonmonotonic;
2722   }
2723   return Schedule | Modifier;
2724 }
2725 
2726 void CGOpenMPRuntime::emitForDispatchInit(
2727     CodeGenFunction &CGF, SourceLocation Loc,
2728     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2729     bool Ordered, const DispatchRTInput &DispatchValues) {
2730   if (!CGF.HaveInsertPoint())
2731     return;
2732   OpenMPSchedType Schedule = getRuntimeSchedule(
2733       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2734   assert(Ordered ||
2735          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2736           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2737           Schedule != OMP_sch_static_balanced_chunked));
2738   // Call __kmpc_dispatch_init(
2739   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2740   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2741   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2742 
2743   // If the Chunk was not specified in the clause - use default value 1.
2744   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2745                                             : CGF.Builder.getIntN(IVSize, 1);
2746   llvm::Value *Args[] = {
2747       emitUpdateLocation(CGF, Loc),
2748       getThreadID(CGF, Loc),
2749       CGF.Builder.getInt32(addMonoNonMonoModifier(
2750           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2751       DispatchValues.LB,                                     // Lower
2752       DispatchValues.UB,                                     // Upper
2753       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2754       Chunk                                                  // Chunk
2755   };
2756   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2757 }
2758 
2759 static void emitForStaticInitCall(
2760     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2761     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2762     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2763     const CGOpenMPRuntime::StaticRTInput &Values) {
2764   if (!CGF.HaveInsertPoint())
2765     return;
2766 
2767   assert(!Values.Ordered);
2768   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2769          Schedule == OMP_sch_static_balanced_chunked ||
2770          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2771          Schedule == OMP_dist_sch_static ||
2772          Schedule == OMP_dist_sch_static_chunked);
2773 
2774   // Call __kmpc_for_static_init(
2775   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2776   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2777   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2778   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2779   llvm::Value *Chunk = Values.Chunk;
2780   if (Chunk == nullptr) {
2781     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2782             Schedule == OMP_dist_sch_static) &&
2783            "expected static non-chunked schedule");
2784     // If the Chunk was not specified in the clause - use default value 1.
2785     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2786   } else {
2787     assert((Schedule == OMP_sch_static_chunked ||
2788             Schedule == OMP_sch_static_balanced_chunked ||
2789             Schedule == OMP_ord_static_chunked ||
2790             Schedule == OMP_dist_sch_static_chunked) &&
2791            "expected static chunked schedule");
2792   }
2793   llvm::Value *Args[] = {
2794       UpdateLocation,
2795       ThreadId,
2796       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2797                                                   M2)), // Schedule type
2798       Values.IL.getPointer(),                           // &isLastIter
2799       Values.LB.getPointer(),                           // &LB
2800       Values.UB.getPointer(),                           // &UB
2801       Values.ST.getPointer(),                           // &Stride
2802       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2803       Chunk                                             // Chunk
2804   };
2805   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2806 }
2807 
2808 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2809                                         SourceLocation Loc,
2810                                         OpenMPDirectiveKind DKind,
2811                                         const OpenMPScheduleTy &ScheduleKind,
2812                                         const StaticRTInput &Values) {
2813   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2814       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2815   assert(isOpenMPWorksharingDirective(DKind) &&
2816          "Expected loop-based or sections-based directive.");
2817   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2818                                              isOpenMPLoopDirective(DKind)
2819                                                  ? OMP_IDENT_WORK_LOOP
2820                                                  : OMP_IDENT_WORK_SECTIONS);
2821   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2822   llvm::FunctionCallee StaticInitFunction =
2823       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2824   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2825   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2826                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2827 }
2828 
2829 void CGOpenMPRuntime::emitDistributeStaticInit(
2830     CodeGenFunction &CGF, SourceLocation Loc,
2831     OpenMPDistScheduleClauseKind SchedKind,
2832     const CGOpenMPRuntime::StaticRTInput &Values) {
2833   OpenMPSchedType ScheduleNum =
2834       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2835   llvm::Value *UpdatedLocation =
2836       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2837   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2838   llvm::FunctionCallee StaticInitFunction =
2839       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2842                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2843 }
2844 
2845 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2846                                           SourceLocation Loc,
2847                                           OpenMPDirectiveKind DKind) {
2848   if (!CGF.HaveInsertPoint())
2849     return;
2850   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2851   llvm::Value *Args[] = {
2852       emitUpdateLocation(CGF, Loc,
2853                          isOpenMPDistributeDirective(DKind)
2854                              ? OMP_IDENT_WORK_DISTRIBUTE
2855                              : isOpenMPLoopDirective(DKind)
2856                                    ? OMP_IDENT_WORK_LOOP
2857                                    : OMP_IDENT_WORK_SECTIONS),
2858       getThreadID(CGF, Loc)};
2859   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2860   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2861                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2862                       Args);
2863 }
2864 
2865 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2866                                                  SourceLocation Loc,
2867                                                  unsigned IVSize,
2868                                                  bool IVSigned) {
2869   if (!CGF.HaveInsertPoint())
2870     return;
2871   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2872   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2873   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2874 }
2875 
2876 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2877                                           SourceLocation Loc, unsigned IVSize,
2878                                           bool IVSigned, Address IL,
2879                                           Address LB, Address UB,
2880                                           Address ST) {
2881   // Call __kmpc_dispatch_next(
2882   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2883   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2884   //          kmp_int[32|64] *p_stride);
2885   llvm::Value *Args[] = {
2886       emitUpdateLocation(CGF, Loc),
2887       getThreadID(CGF, Loc),
2888       IL.getPointer(), // &isLastIter
2889       LB.getPointer(), // &Lower
2890       UB.getPointer(), // &Upper
2891       ST.getPointer()  // &Stride
2892   };
2893   llvm::Value *Call =
2894       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2895   return CGF.EmitScalarConversion(
2896       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2897       CGF.getContext().BoolTy, Loc);
2898 }
2899 
2900 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2901                                            llvm::Value *NumThreads,
2902                                            SourceLocation Loc) {
2903   if (!CGF.HaveInsertPoint())
2904     return;
2905   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2906   llvm::Value *Args[] = {
2907       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2909   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2911                       Args);
2912 }
2913 
2914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2915                                          ProcBindKind ProcBind,
2916                                          SourceLocation Loc) {
2917   if (!CGF.HaveInsertPoint())
2918     return;
2919   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2920   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2921   llvm::Value *Args[] = {
2922       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2923       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2924   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2925                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2926                       Args);
2927 }
2928 
2929 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2930                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2931   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2932     OMPBuilder.createFlush(CGF.Builder);
2933   } else {
2934     if (!CGF.HaveInsertPoint())
2935       return;
2936     // Build call void __kmpc_flush(ident_t *loc)
2937     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                             CGM.getModule(), OMPRTL___kmpc_flush),
2939                         emitUpdateLocation(CGF, Loc));
2940   }
2941 }
2942 
2943 namespace {
2944 /// Indexes of fields for type kmp_task_t.
2945 enum KmpTaskTFields {
2946   /// List of shared variables.
2947   KmpTaskTShareds,
2948   /// Task routine.
2949   KmpTaskTRoutine,
2950   /// Partition id for the untied tasks.
2951   KmpTaskTPartId,
2952   /// Function with call of destructors for private variables.
2953   Data1,
2954   /// Task priority.
2955   Data2,
2956   /// (Taskloops only) Lower bound.
2957   KmpTaskTLowerBound,
2958   /// (Taskloops only) Upper bound.
2959   KmpTaskTUpperBound,
2960   /// (Taskloops only) Stride.
2961   KmpTaskTStride,
2962   /// (Taskloops only) Is last iteration flag.
2963   KmpTaskTLastIter,
2964   /// (Taskloops only) Reduction data.
2965   KmpTaskTReductions,
2966 };
2967 } // anonymous namespace
2968 
2969 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2970   return OffloadEntriesTargetRegion.empty() &&
2971          OffloadEntriesDeviceGlobalVar.empty();
2972 }
2973 
2974 /// Initialize target region entry.
2975 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2976     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2977                                     StringRef ParentName, unsigned LineNum,
2978                                     unsigned Order) {
2979   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2980                                              "only required for the device "
2981                                              "code generation.");
2982   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2983       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2984                                    OMPTargetRegionEntryTargetRegion);
2985   ++OffloadingEntriesNum;
2986 }
2987 
2988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2989     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2990                                   StringRef ParentName, unsigned LineNum,
2991                                   llvm::Constant *Addr, llvm::Constant *ID,
2992                                   OMPTargetRegionEntryKind Flags) {
2993   // If we are emitting code for a target, the entry is already initialized,
2994   // only has to be registered.
2995   if (CGM.getLangOpts().OpenMPIsDevice) {
2996     // This could happen if the device compilation is invoked standalone.
2997     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2998       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2999                                       OffloadingEntriesNum);
3000     auto &Entry =
3001         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3002     Entry.setAddress(Addr);
3003     Entry.setID(ID);
3004     Entry.setFlags(Flags);
3005   } else {
3006     if (Flags ==
3007             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3008         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3009                                  /*IgnoreAddressId*/ true))
3010       return;
3011     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3012            "Target region entry already registered!");
3013     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3014     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3015     ++OffloadingEntriesNum;
3016   }
3017 }
3018 
3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3020     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3021     bool IgnoreAddressId) const {
3022   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3023   if (PerDevice == OffloadEntriesTargetRegion.end())
3024     return false;
3025   auto PerFile = PerDevice->second.find(FileID);
3026   if (PerFile == PerDevice->second.end())
3027     return false;
3028   auto PerParentName = PerFile->second.find(ParentName);
3029   if (PerParentName == PerFile->second.end())
3030     return false;
3031   auto PerLine = PerParentName->second.find(LineNum);
3032   if (PerLine == PerParentName->second.end())
3033     return false;
3034   // Fail if this entry is already registered.
3035   if (!IgnoreAddressId &&
3036       (PerLine->second.getAddress() || PerLine->second.getID()))
3037     return false;
3038   return true;
3039 }
3040 
3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3042     const OffloadTargetRegionEntryInfoActTy &Action) {
3043   // Scan all target region entries and perform the provided action.
3044   for (const auto &D : OffloadEntriesTargetRegion)
3045     for (const auto &F : D.second)
3046       for (const auto &P : F.second)
3047         for (const auto &L : P.second)
3048           Action(D.first, F.first, P.first(), L.first, L.second);
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3053                                        OMPTargetGlobalVarEntryKind Flags,
3054                                        unsigned Order) {
3055   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3056                                              "only required for the device "
3057                                              "code generation.");
3058   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3059   ++OffloadingEntriesNum;
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3063     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3064                                      CharUnits VarSize,
3065                                      OMPTargetGlobalVarEntryKind Flags,
3066                                      llvm::GlobalValue::LinkageTypes Linkage) {
3067   if (CGM.getLangOpts().OpenMPIsDevice) {
3068     // This could happen if the device compilation is invoked standalone.
3069     if (!hasDeviceGlobalVarEntryInfo(VarName))
3070       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3071     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3072     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3073            "Resetting with the new address.");
3074     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3075       if (Entry.getVarSize().isZero()) {
3076         Entry.setVarSize(VarSize);
3077         Entry.setLinkage(Linkage);
3078       }
3079       return;
3080     }
3081     Entry.setVarSize(VarSize);
3082     Entry.setLinkage(Linkage);
3083     Entry.setAddress(Addr);
3084   } else {
3085     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3086       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3087       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3088              "Entry not initialized!");
3089       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3090              "Resetting with the new address.");
3091       if (Entry.getVarSize().isZero()) {
3092         Entry.setVarSize(VarSize);
3093         Entry.setLinkage(Linkage);
3094       }
3095       return;
3096     }
3097     OffloadEntriesDeviceGlobalVar.try_emplace(
3098         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3099     ++OffloadingEntriesNum;
3100   }
3101 }
3102 
3103 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3104     actOnDeviceGlobalVarEntriesInfo(
3105         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3106   // Scan all target region entries and perform the provided action.
3107   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3108     Action(E.getKey(), E.getValue());
3109 }
3110 
3111 void CGOpenMPRuntime::createOffloadEntry(
3112     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3113     llvm::GlobalValue::LinkageTypes Linkage) {
3114   StringRef Name = Addr->getName();
3115   llvm::Module &M = CGM.getModule();
3116   llvm::LLVMContext &C = M.getContext();
3117 
3118   // Create constant string with the name.
3119   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3120 
3121   std::string StringName = getName({"omp_offloading", "entry_name"});
3122   auto *Str = new llvm::GlobalVariable(
3123       M, StrPtrInit->getType(), /*isConstant=*/true,
3124       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3125   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3126 
3127   llvm::Constant *Data[] = {
3128       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3130       llvm::ConstantInt::get(CGM.SizeTy, Size),
3131       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3132       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3133   std::string EntryName = getName({"omp_offloading", "entry", ""});
3134   llvm::GlobalVariable *Entry = createGlobalStruct(
3135       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3136       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3137 
3138   // The entry has to be created in the section the linker expects it to be.
3139   Entry->setSection("omp_offloading_entries");
3140 }
3141 
3142 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3143   // Emit the offloading entries and metadata so that the device codegen side
3144   // can easily figure out what to emit. The produced metadata looks like
3145   // this:
3146   //
3147   // !omp_offload.info = !{!1, ...}
3148   //
3149   // Right now we only generate metadata for function that contain target
3150   // regions.
3151 
3152   // If we are in simd mode or there are no entries, we don't need to do
3153   // anything.
3154   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3155     return;
3156 
3157   llvm::Module &M = CGM.getModule();
3158   llvm::LLVMContext &C = M.getContext();
3159   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3160                          SourceLocation, StringRef>,
3161               16>
3162       OrderedEntries(OffloadEntriesInfoManager.size());
3163   llvm::SmallVector<StringRef, 16> ParentFunctions(
3164       OffloadEntriesInfoManager.size());
3165 
3166   // Auxiliary methods to create metadata values and strings.
3167   auto &&GetMDInt = [this](unsigned V) {
3168     return llvm::ConstantAsMetadata::get(
3169         llvm::ConstantInt::get(CGM.Int32Ty, V));
3170   };
3171 
3172   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3173 
3174   // Create the offloading info metadata node.
3175   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3176 
3177   // Create function that emits metadata for each target region entry;
3178   auto &&TargetRegionMetadataEmitter =
3179       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3180        &GetMDString](
3181           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3182           unsigned Line,
3183           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3184         // Generate metadata for target regions. Each entry of this metadata
3185         // contains:
3186         // - Entry 0 -> Kind of this type of metadata (0).
3187         // - Entry 1 -> Device ID of the file where the entry was identified.
3188         // - Entry 2 -> File ID of the file where the entry was identified.
3189         // - Entry 3 -> Mangled name of the function where the entry was
3190         // identified.
3191         // - Entry 4 -> Line in the file where the entry was identified.
3192         // - Entry 5 -> Order the entry was created.
3193         // The first element of the metadata node is the kind.
3194         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3195                                  GetMDInt(FileID),      GetMDString(ParentName),
3196                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3197 
3198         SourceLocation Loc;
3199         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3200                   E = CGM.getContext().getSourceManager().fileinfo_end();
3201              I != E; ++I) {
3202           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3203               I->getFirst()->getUniqueID().getFile() == FileID) {
3204             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3205                 I->getFirst(), Line, 1);
3206             break;
3207           }
3208         }
3209         // Save this entry in the right position of the ordered entries array.
3210         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3211         ParentFunctions[E.getOrder()] = ParentName;
3212 
3213         // Add metadata to the named metadata node.
3214         MD->addOperand(llvm::MDNode::get(C, Ops));
3215       };
3216 
3217   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3218       TargetRegionMetadataEmitter);
3219 
3220   // Create function that emits metadata for each device global variable entry;
3221   auto &&DeviceGlobalVarMetadataEmitter =
3222       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3223        MD](StringRef MangledName,
3224            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3225                &E) {
3226         // Generate metadata for global variables. Each entry of this metadata
3227         // contains:
3228         // - Entry 0 -> Kind of this type of metadata (1).
3229         // - Entry 1 -> Mangled name of the variable.
3230         // - Entry 2 -> Declare target kind.
3231         // - Entry 3 -> Order the entry was created.
3232         // The first element of the metadata node is the kind.
3233         llvm::Metadata *Ops[] = {
3234             GetMDInt(E.getKind()), GetMDString(MangledName),
3235             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3236 
3237         // Save this entry in the right position of the ordered entries array.
3238         OrderedEntries[E.getOrder()] =
3239             std::make_tuple(&E, SourceLocation(), MangledName);
3240 
3241         // Add metadata to the named metadata node.
3242         MD->addOperand(llvm::MDNode::get(C, Ops));
3243       };
3244 
3245   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3246       DeviceGlobalVarMetadataEmitter);
3247 
3248   for (const auto &E : OrderedEntries) {
3249     assert(std::get<0>(E) && "All ordered entries must exist!");
3250     if (const auto *CE =
3251             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3252                 std::get<0>(E))) {
3253       if (!CE->getID() || !CE->getAddress()) {
3254         // Do not blame the entry if the parent funtion is not emitted.
3255         StringRef FnName = ParentFunctions[CE->getOrder()];
3256         if (!CGM.GetGlobalValue(FnName))
3257           continue;
3258         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3259             DiagnosticsEngine::Error,
3260             "Offloading entry for target region in %0 is incorrect: either the "
3261             "address or the ID is invalid.");
3262         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3263         continue;
3264       }
3265       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3266                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3267     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3268                                              OffloadEntryInfoDeviceGlobalVar>(
3269                    std::get<0>(E))) {
3270       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3271           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3272               CE->getFlags());
3273       switch (Flags) {
3274       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3275         if (CGM.getLangOpts().OpenMPIsDevice &&
3276             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3277           continue;
3278         if (!CE->getAddress()) {
3279           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3280               DiagnosticsEngine::Error, "Offloading entry for declare target "
3281                                         "variable %0 is incorrect: the "
3282                                         "address is invalid.");
3283           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3284           continue;
3285         }
3286         // The vaiable has no definition - no need to add the entry.
3287         if (CE->getVarSize().isZero())
3288           continue;
3289         break;
3290       }
3291       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3292         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3293                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3294                "Declaret target link address is set.");
3295         if (CGM.getLangOpts().OpenMPIsDevice)
3296           continue;
3297         if (!CE->getAddress()) {
3298           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299               DiagnosticsEngine::Error,
3300               "Offloading entry for declare target variable is incorrect: the "
3301               "address is invalid.");
3302           CGM.getDiags().Report(DiagID);
3303           continue;
3304         }
3305         break;
3306       }
3307       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3308                          CE->getVarSize().getQuantity(), Flags,
3309                          CE->getLinkage());
3310     } else {
3311       llvm_unreachable("Unsupported entry kind.");
3312     }
3313   }
3314 }
3315 
3316 /// Loads all the offload entries information from the host IR
3317 /// metadata.
3318 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3319   // If we are in target mode, load the metadata from the host IR. This code has
3320   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3321 
3322   if (!CGM.getLangOpts().OpenMPIsDevice)
3323     return;
3324 
3325   if (CGM.getLangOpts().OMPHostIRFile.empty())
3326     return;
3327 
3328   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3329   if (auto EC = Buf.getError()) {
3330     CGM.getDiags().Report(diag::err_cannot_open_file)
3331         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3332     return;
3333   }
3334 
3335   llvm::LLVMContext C;
3336   auto ME = expectedToErrorOrAndEmitErrors(
3337       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3338 
3339   if (auto EC = ME.getError()) {
3340     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3341         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3342     CGM.getDiags().Report(DiagID)
3343         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3344     return;
3345   }
3346 
3347   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3348   if (!MD)
3349     return;
3350 
3351   for (llvm::MDNode *MN : MD->operands()) {
3352     auto &&GetMDInt = [MN](unsigned Idx) {
3353       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3354       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3355     };
3356 
3357     auto &&GetMDString = [MN](unsigned Idx) {
3358       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3359       return V->getString();
3360     };
3361 
3362     switch (GetMDInt(0)) {
3363     default:
3364       llvm_unreachable("Unexpected metadata!");
3365       break;
3366     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3367         OffloadingEntryInfoTargetRegion:
3368       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3369           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3370           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3371           /*Order=*/GetMDInt(5));
3372       break;
3373     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3374         OffloadingEntryInfoDeviceGlobalVar:
3375       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3376           /*MangledName=*/GetMDString(1),
3377           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3378               /*Flags=*/GetMDInt(2)),
3379           /*Order=*/GetMDInt(3));
3380       break;
3381     }
3382   }
3383 }
3384 
3385 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3386   if (!KmpRoutineEntryPtrTy) {
3387     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3388     ASTContext &C = CGM.getContext();
3389     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3390     FunctionProtoType::ExtProtoInfo EPI;
3391     KmpRoutineEntryPtrQTy = C.getPointerType(
3392         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3393     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3394   }
3395 }
3396 
3397 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3398   // Make sure the type of the entry is already created. This is the type we
3399   // have to create:
3400   // struct __tgt_offload_entry{
3401   //   void      *addr;       // Pointer to the offload entry info.
3402   //                          // (function or global)
3403   //   char      *name;       // Name of the function or global.
3404   //   size_t     size;       // Size of the entry info (0 if it a function).
3405   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3406   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3407   // };
3408   if (TgtOffloadEntryQTy.isNull()) {
3409     ASTContext &C = CGM.getContext();
3410     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3411     RD->startDefinition();
3412     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3413     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3414     addFieldToRecordDecl(C, RD, C.getSizeType());
3415     addFieldToRecordDecl(
3416         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3417     addFieldToRecordDecl(
3418         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3419     RD->completeDefinition();
3420     RD->addAttr(PackedAttr::CreateImplicit(C));
3421     TgtOffloadEntryQTy = C.getRecordType(RD);
3422   }
3423   return TgtOffloadEntryQTy;
3424 }
3425 
3426 namespace {
3427 struct PrivateHelpersTy {
3428   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3429                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3430       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3431         PrivateElemInit(PrivateElemInit) {}
3432   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3433   const Expr *OriginalRef = nullptr;
3434   const VarDecl *Original = nullptr;
3435   const VarDecl *PrivateCopy = nullptr;
3436   const VarDecl *PrivateElemInit = nullptr;
3437   bool isLocalPrivate() const {
3438     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3439   }
3440 };
3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3442 } // anonymous namespace
3443 
3444 static bool isAllocatableDecl(const VarDecl *VD) {
3445   const VarDecl *CVD = VD->getCanonicalDecl();
3446   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3447     return false;
3448   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3449   // Use the default allocation.
3450   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3451             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3452            !AA->getAllocator());
3453 }
3454 
3455 static RecordDecl *
3456 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3457   if (!Privates.empty()) {
3458     ASTContext &C = CGM.getContext();
3459     // Build struct .kmp_privates_t. {
3460     //         /*  private vars  */
3461     //       };
3462     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3463     RD->startDefinition();
3464     for (const auto &Pair : Privates) {
3465       const VarDecl *VD = Pair.second.Original;
3466       QualType Type = VD->getType().getNonReferenceType();
3467       // If the private variable is a local variable with lvalue ref type,
3468       // allocate the pointer instead of the pointee type.
3469       if (Pair.second.isLocalPrivate()) {
3470         if (VD->getType()->isLValueReferenceType())
3471           Type = C.getPointerType(Type);
3472         if (isAllocatableDecl(VD))
3473           Type = C.getPointerType(Type);
3474       }
3475       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3476       if (VD->hasAttrs()) {
3477         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3478              E(VD->getAttrs().end());
3479              I != E; ++I)
3480           FD->addAttr(*I);
3481       }
3482     }
3483     RD->completeDefinition();
3484     return RD;
3485   }
3486   return nullptr;
3487 }
3488 
3489 static RecordDecl *
3490 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3491                          QualType KmpInt32Ty,
3492                          QualType KmpRoutineEntryPointerQTy) {
3493   ASTContext &C = CGM.getContext();
3494   // Build struct kmp_task_t {
3495   //         void *              shareds;
3496   //         kmp_routine_entry_t routine;
3497   //         kmp_int32           part_id;
3498   //         kmp_cmplrdata_t data1;
3499   //         kmp_cmplrdata_t data2;
3500   // For taskloops additional fields:
3501   //         kmp_uint64          lb;
3502   //         kmp_uint64          ub;
3503   //         kmp_int64           st;
3504   //         kmp_int32           liter;
3505   //         void *              reductions;
3506   //       };
3507   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3508   UD->startDefinition();
3509   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3510   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3511   UD->completeDefinition();
3512   QualType KmpCmplrdataTy = C.getRecordType(UD);
3513   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3514   RD->startDefinition();
3515   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3516   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3517   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3518   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   if (isOpenMPTaskLoopDirective(Kind)) {
3521     QualType KmpUInt64Ty =
3522         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3523     QualType KmpInt64Ty =
3524         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3525     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3529     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3530   }
3531   RD->completeDefinition();
3532   return RD;
3533 }
3534 
3535 static RecordDecl *
3536 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3537                                      ArrayRef<PrivateDataTy> Privates) {
3538   ASTContext &C = CGM.getContext();
3539   // Build struct kmp_task_t_with_privates {
3540   //         kmp_task_t task_data;
3541   //         .kmp_privates_t. privates;
3542   //       };
3543   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3544   RD->startDefinition();
3545   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3546   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3547     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3548   RD->completeDefinition();
3549   return RD;
3550 }
3551 
3552 /// Emit a proxy function which accepts kmp_task_t as the second
3553 /// argument.
3554 /// \code
3555 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3556 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3557 ///   For taskloops:
3558 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3559 ///   tt->reductions, tt->shareds);
3560 ///   return 0;
3561 /// }
3562 /// \endcode
3563 static llvm::Function *
3564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3565                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3566                       QualType KmpTaskTWithPrivatesPtrQTy,
3567                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3568                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3569                       llvm::Value *TaskPrivatesMap) {
3570   ASTContext &C = CGM.getContext();
3571   FunctionArgList Args;
3572   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3573                             ImplicitParamDecl::Other);
3574   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3575                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3576                                 ImplicitParamDecl::Other);
3577   Args.push_back(&GtidArg);
3578   Args.push_back(&TaskTypeArg);
3579   const auto &TaskEntryFnInfo =
3580       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3581   llvm::FunctionType *TaskEntryTy =
3582       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3583   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3584   auto *TaskEntry = llvm::Function::Create(
3585       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3586   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3587   TaskEntry->setDoesNotRecurse();
3588   CodeGenFunction CGF(CGM);
3589   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3590                     Loc, Loc);
3591 
3592   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3593   // tt,
3594   // For taskloops:
3595   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3596   // tt->task_data.shareds);
3597   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3598       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3599   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3600       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3601       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3602   const auto *KmpTaskTWithPrivatesQTyRD =
3603       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3604   LValue Base =
3605       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3606   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3607   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3608   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3609   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3610 
3611   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3612   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3613   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3614       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3615       CGF.ConvertTypeForMem(SharedsPtrTy));
3616 
3617   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3618   llvm::Value *PrivatesParam;
3619   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3620     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3621     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3622         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3623   } else {
3624     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3625   }
3626 
3627   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3628                                TaskPrivatesMap,
3629                                CGF.Builder
3630                                    .CreatePointerBitCastOrAddrSpaceCast(
3631                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3632                                    .getPointer()};
3633   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3634                                           std::end(CommonArgs));
3635   if (isOpenMPTaskLoopDirective(Kind)) {
3636     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3637     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3638     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3639     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3640     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3641     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3642     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3643     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3644     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3645     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3646     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3647     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3648     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3649     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3650     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3651     CallArgs.push_back(LBParam);
3652     CallArgs.push_back(UBParam);
3653     CallArgs.push_back(StParam);
3654     CallArgs.push_back(LIParam);
3655     CallArgs.push_back(RParam);
3656   }
3657   CallArgs.push_back(SharedsParam);
3658 
3659   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3660                                                   CallArgs);
3661   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3662                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3663   CGF.FinishFunction();
3664   return TaskEntry;
3665 }
3666 
3667 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3668                                             SourceLocation Loc,
3669                                             QualType KmpInt32Ty,
3670                                             QualType KmpTaskTWithPrivatesPtrQTy,
3671                                             QualType KmpTaskTWithPrivatesQTy) {
3672   ASTContext &C = CGM.getContext();
3673   FunctionArgList Args;
3674   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3675                             ImplicitParamDecl::Other);
3676   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3677                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3678                                 ImplicitParamDecl::Other);
3679   Args.push_back(&GtidArg);
3680   Args.push_back(&TaskTypeArg);
3681   const auto &DestructorFnInfo =
3682       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3683   llvm::FunctionType *DestructorFnTy =
3684       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3685   std::string Name =
3686       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3687   auto *DestructorFn =
3688       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3689                              Name, &CGM.getModule());
3690   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3691                                     DestructorFnInfo);
3692   DestructorFn->setDoesNotRecurse();
3693   CodeGenFunction CGF(CGM);
3694   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3695                     Args, Loc, Loc);
3696 
3697   LValue Base = CGF.EmitLoadOfPointerLValue(
3698       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3699       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3700   const auto *KmpTaskTWithPrivatesQTyRD =
3701       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3702   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3703   Base = CGF.EmitLValueForField(Base, *FI);
3704   for (const auto *Field :
3705        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3706     if (QualType::DestructionKind DtorKind =
3707             Field->getType().isDestructedType()) {
3708       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3709       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3710     }
3711   }
3712   CGF.FinishFunction();
3713   return DestructorFn;
3714 }
3715 
3716 /// Emit a privates mapping function for correct handling of private and
3717 /// firstprivate variables.
3718 /// \code
3719 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3720 /// **noalias priv1,...,  <tyn> **noalias privn) {
3721 ///   *priv1 = &.privates.priv1;
3722 ///   ...;
3723 ///   *privn = &.privates.privn;
3724 /// }
3725 /// \endcode
3726 static llvm::Value *
3727 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3728                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3729                                ArrayRef<PrivateDataTy> Privates) {
3730   ASTContext &C = CGM.getContext();
3731   FunctionArgList Args;
3732   ImplicitParamDecl TaskPrivatesArg(
3733       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3734       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3735       ImplicitParamDecl::Other);
3736   Args.push_back(&TaskPrivatesArg);
3737   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3738   unsigned Counter = 1;
3739   for (const Expr *E : Data.PrivateVars) {
3740     Args.push_back(ImplicitParamDecl::Create(
3741         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3742         C.getPointerType(C.getPointerType(E->getType()))
3743             .withConst()
3744             .withRestrict(),
3745         ImplicitParamDecl::Other));
3746     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3747     PrivateVarsPos[VD] = Counter;
3748     ++Counter;
3749   }
3750   for (const Expr *E : Data.FirstprivateVars) {
3751     Args.push_back(ImplicitParamDecl::Create(
3752         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3753         C.getPointerType(C.getPointerType(E->getType()))
3754             .withConst()
3755             .withRestrict(),
3756         ImplicitParamDecl::Other));
3757     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3758     PrivateVarsPos[VD] = Counter;
3759     ++Counter;
3760   }
3761   for (const Expr *E : Data.LastprivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const VarDecl *VD : Data.PrivateLocals) {
3773     QualType Ty = VD->getType().getNonReferenceType();
3774     if (VD->getType()->isLValueReferenceType())
3775       Ty = C.getPointerType(Ty);
3776     if (isAllocatableDecl(VD))
3777       Ty = C.getPointerType(Ty);
3778     Args.push_back(ImplicitParamDecl::Create(
3779         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3780         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3781         ImplicitParamDecl::Other));
3782     PrivateVarsPos[VD] = Counter;
3783     ++Counter;
3784   }
3785   const auto &TaskPrivatesMapFnInfo =
3786       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3787   llvm::FunctionType *TaskPrivatesMapTy =
3788       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3789   std::string Name =
3790       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3791   auto *TaskPrivatesMap = llvm::Function::Create(
3792       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3793       &CGM.getModule());
3794   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3795                                     TaskPrivatesMapFnInfo);
3796   if (CGM.getLangOpts().Optimize) {
3797     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3799     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3800   }
3801   CodeGenFunction CGF(CGM);
3802   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3803                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3804 
3805   // *privi = &.privates.privi;
3806   LValue Base = CGF.EmitLoadOfPointerLValue(
3807       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3808       TaskPrivatesArg.getType()->castAs<PointerType>());
3809   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3810   Counter = 0;
3811   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3812     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3813     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3814     LValue RefLVal =
3815         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3816     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3817         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3818     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3819     ++Counter;
3820   }
3821   CGF.FinishFunction();
3822   return TaskPrivatesMap;
3823 }
3824 
3825 /// Emit initialization for private variables in task-based directives.
3826 static void emitPrivatesInit(CodeGenFunction &CGF,
3827                              const OMPExecutableDirective &D,
3828                              Address KmpTaskSharedsPtr, LValue TDBase,
3829                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3830                              QualType SharedsTy, QualType SharedsPtrTy,
3831                              const OMPTaskDataTy &Data,
3832                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3833   ASTContext &C = CGF.getContext();
3834   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3835   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3836   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3837                                  ? OMPD_taskloop
3838                                  : OMPD_task;
3839   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3840   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3841   LValue SrcBase;
3842   bool IsTargetTask =
3843       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3844       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3845   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3846   // PointersArray, SizesArray, and MappersArray. The original variables for
3847   // these arrays are not captured and we get their addresses explicitly.
3848   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3849       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3850     SrcBase = CGF.MakeAddrLValue(
3851         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3852             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3853         SharedsTy);
3854   }
3855   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3856   for (const PrivateDataTy &Pair : Privates) {
3857     // Do not initialize private locals.
3858     if (Pair.second.isLocalPrivate()) {
3859       ++FI;
3860       continue;
3861     }
3862     const VarDecl *VD = Pair.second.PrivateCopy;
3863     const Expr *Init = VD->getAnyInitializer();
3864     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3865                              !CGF.isTrivialInitializer(Init)))) {
3866       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3867       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3868         const VarDecl *OriginalVD = Pair.second.Original;
3869         // Check if the variable is the target-based BasePointersArray,
3870         // PointersArray, SizesArray, or MappersArray.
3871         LValue SharedRefLValue;
3872         QualType Type = PrivateLValue.getType();
3873         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3874         if (IsTargetTask && !SharedField) {
3875           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3876                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3877                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3878                          ->getNumParams() == 0 &&
3879                  isa<TranslationUnitDecl>(
3880                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3881                          ->getDeclContext()) &&
3882                  "Expected artificial target data variable.");
3883           SharedRefLValue =
3884               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3885         } else if (ForDup) {
3886           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3887           SharedRefLValue = CGF.MakeAddrLValue(
3888               Address(SharedRefLValue.getPointer(CGF),
3889                       C.getDeclAlign(OriginalVD)),
3890               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3891               SharedRefLValue.getTBAAInfo());
3892         } else if (CGF.LambdaCaptureFields.count(
3893                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3894                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3895           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3896         } else {
3897           // Processing for implicitly captured variables.
3898           InlinedOpenMPRegionRAII Region(
3899               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3900               /*HasCancel=*/false, /*NoInheritance=*/true);
3901           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3902         }
3903         if (Type->isArrayType()) {
3904           // Initialize firstprivate array.
3905           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3906             // Perform simple memcpy.
3907             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3908           } else {
3909             // Initialize firstprivate array using element-by-element
3910             // initialization.
3911             CGF.EmitOMPAggregateAssign(
3912                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3913                 Type,
3914                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3915                                                   Address SrcElement) {
3916                   // Clean up any temporaries needed by the initialization.
3917                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3918                   InitScope.addPrivate(
3919                       Elem, [SrcElement]() -> Address { return SrcElement; });
3920                   (void)InitScope.Privatize();
3921                   // Emit initialization for single element.
3922                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3923                       CGF, &CapturesInfo);
3924                   CGF.EmitAnyExprToMem(Init, DestElement,
3925                                        Init->getType().getQualifiers(),
3926                                        /*IsInitializer=*/false);
3927                 });
3928           }
3929         } else {
3930           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3931           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3932             return SharedRefLValue.getAddress(CGF);
3933           });
3934           (void)InitScope.Privatize();
3935           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3936           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3937                              /*capturedByInit=*/false);
3938         }
3939       } else {
3940         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3941       }
3942     }
3943     ++FI;
3944   }
3945 }
3946 
3947 /// Check if duplication function is required for taskloops.
3948 static bool checkInitIsRequired(CodeGenFunction &CGF,
3949                                 ArrayRef<PrivateDataTy> Privates) {
3950   bool InitRequired = false;
3951   for (const PrivateDataTy &Pair : Privates) {
3952     if (Pair.second.isLocalPrivate())
3953       continue;
3954     const VarDecl *VD = Pair.second.PrivateCopy;
3955     const Expr *Init = VD->getAnyInitializer();
3956     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3957                                     !CGF.isTrivialInitializer(Init));
3958     if (InitRequired)
3959       break;
3960   }
3961   return InitRequired;
3962 }
3963 
3964 
3965 /// Emit task_dup function (for initialization of
3966 /// private/firstprivate/lastprivate vars and last_iter flag)
3967 /// \code
3968 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3969 /// lastpriv) {
3970 /// // setup lastprivate flag
3971 ///    task_dst->last = lastpriv;
3972 /// // could be constructor calls here...
3973 /// }
3974 /// \endcode
3975 static llvm::Value *
3976 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3977                     const OMPExecutableDirective &D,
3978                     QualType KmpTaskTWithPrivatesPtrQTy,
3979                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3980                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3981                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3982                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3983   ASTContext &C = CGM.getContext();
3984   FunctionArgList Args;
3985   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3986                            KmpTaskTWithPrivatesPtrQTy,
3987                            ImplicitParamDecl::Other);
3988   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3989                            KmpTaskTWithPrivatesPtrQTy,
3990                            ImplicitParamDecl::Other);
3991   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3992                                 ImplicitParamDecl::Other);
3993   Args.push_back(&DstArg);
3994   Args.push_back(&SrcArg);
3995   Args.push_back(&LastprivArg);
3996   const auto &TaskDupFnInfo =
3997       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3998   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3999   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4000   auto *TaskDup = llvm::Function::Create(
4001       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4002   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4003   TaskDup->setDoesNotRecurse();
4004   CodeGenFunction CGF(CGM);
4005   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4006                     Loc);
4007 
4008   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4009       CGF.GetAddrOfLocalVar(&DstArg),
4010       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4011   // task_dst->liter = lastpriv;
4012   if (WithLastIter) {
4013     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4014     LValue Base = CGF.EmitLValueForField(
4015         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4016     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4017     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4018         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4019     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4020   }
4021 
4022   // Emit initial values for private copies (if any).
4023   assert(!Privates.empty());
4024   Address KmpTaskSharedsPtr = Address::invalid();
4025   if (!Data.FirstprivateVars.empty()) {
4026     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027         CGF.GetAddrOfLocalVar(&SrcArg),
4028         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029     LValue Base = CGF.EmitLValueForField(
4030         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4031     KmpTaskSharedsPtr = Address(
4032         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4033                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4034                                                   KmpTaskTShareds)),
4035                              Loc),
4036         CGM.getNaturalTypeAlignment(SharedsTy));
4037   }
4038   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4039                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4040   CGF.FinishFunction();
4041   return TaskDup;
4042 }
4043 
4044 /// Checks if destructor function is required to be generated.
4045 /// \return true if cleanups are required, false otherwise.
4046 static bool
4047 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4048                          ArrayRef<PrivateDataTy> Privates) {
4049   for (const PrivateDataTy &P : Privates) {
4050     if (P.second.isLocalPrivate())
4051       continue;
4052     QualType Ty = P.second.Original->getType().getNonReferenceType();
4053     if (Ty.isDestructedType())
4054       return true;
4055   }
4056   return false;
4057 }
4058 
4059 namespace {
4060 /// Loop generator for OpenMP iterator expression.
4061 class OMPIteratorGeneratorScope final
4062     : public CodeGenFunction::OMPPrivateScope {
4063   CodeGenFunction &CGF;
4064   const OMPIteratorExpr *E = nullptr;
4065   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4067   OMPIteratorGeneratorScope() = delete;
4068   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4069 
4070 public:
4071   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4072       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4073     if (!E)
4074       return;
4075     SmallVector<llvm::Value *, 4> Uppers;
4076     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4077       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4078       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4079       addPrivate(VD, [&CGF, VD]() {
4080         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4081       });
4082       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4083       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4084         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4085                                  "counter.addr");
4086       });
4087     }
4088     Privatize();
4089 
4090     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4091       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4092       LValue CLVal =
4093           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4094                              HelperData.CounterVD->getType());
4095       // Counter = 0;
4096       CGF.EmitStoreOfScalar(
4097           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4098           CLVal);
4099       CodeGenFunction::JumpDest &ContDest =
4100           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4101       CodeGenFunction::JumpDest &ExitDest =
4102           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4103       // N = <number-of_iterations>;
4104       llvm::Value *N = Uppers[I];
4105       // cont:
4106       // if (Counter < N) goto body; else goto exit;
4107       CGF.EmitBlock(ContDest.getBlock());
4108       auto *CVal =
4109           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4110       llvm::Value *Cmp =
4111           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4112               ? CGF.Builder.CreateICmpSLT(CVal, N)
4113               : CGF.Builder.CreateICmpULT(CVal, N);
4114       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4115       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4116       // body:
4117       CGF.EmitBlock(BodyBB);
4118       // Iteri = Begini + Counter * Stepi;
4119       CGF.EmitIgnoredExpr(HelperData.Update);
4120     }
4121   }
4122   ~OMPIteratorGeneratorScope() {
4123     if (!E)
4124       return;
4125     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4126       // Counter = Counter + 1;
4127       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4128       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4129       // goto cont;
4130       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4131       // exit:
4132       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4133     }
4134   }
4135 };
4136 } // namespace
4137 
4138 static std::pair<llvm::Value *, llvm::Value *>
4139 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4140   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4141   llvm::Value *Addr;
4142   if (OASE) {
4143     const Expr *Base = OASE->getBase();
4144     Addr = CGF.EmitScalarExpr(Base);
4145   } else {
4146     Addr = CGF.EmitLValue(E).getPointer(CGF);
4147   }
4148   llvm::Value *SizeVal;
4149   QualType Ty = E->getType();
4150   if (OASE) {
4151     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4152     for (const Expr *SE : OASE->getDimensions()) {
4153       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4154       Sz = CGF.EmitScalarConversion(
4155           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4156       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4157     }
4158   } else if (const auto *ASE =
4159                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4160     LValue UpAddrLVal =
4161         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4162     llvm::Value *UpAddr =
4163         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4164     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4165     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4166     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4167   } else {
4168     SizeVal = CGF.getTypeSize(Ty);
4169   }
4170   return std::make_pair(Addr, SizeVal);
4171 }
4172 
4173 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4174 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4175   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4176   if (KmpTaskAffinityInfoTy.isNull()) {
4177     RecordDecl *KmpAffinityInfoRD =
4178         C.buildImplicitRecord("kmp_task_affinity_info_t");
4179     KmpAffinityInfoRD->startDefinition();
4180     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4181     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4183     KmpAffinityInfoRD->completeDefinition();
4184     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4185   }
4186 }
4187 
4188 CGOpenMPRuntime::TaskResultTy
4189 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4190                               const OMPExecutableDirective &D,
4191                               llvm::Function *TaskFunction, QualType SharedsTy,
4192                               Address Shareds, const OMPTaskDataTy &Data) {
4193   ASTContext &C = CGM.getContext();
4194   llvm::SmallVector<PrivateDataTy, 4> Privates;
4195   // Aggregate privates and sort them by the alignment.
4196   const auto *I = Data.PrivateCopies.begin();
4197   for (const Expr *E : Data.PrivateVars) {
4198     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4199     Privates.emplace_back(
4200         C.getDeclAlign(VD),
4201         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4202                          /*PrivateElemInit=*/nullptr));
4203     ++I;
4204   }
4205   I = Data.FirstprivateCopies.begin();
4206   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4207   for (const Expr *E : Data.FirstprivateVars) {
4208     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4209     Privates.emplace_back(
4210         C.getDeclAlign(VD),
4211         PrivateHelpersTy(
4212             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4213             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4214     ++I;
4215     ++IElemInitRef;
4216   }
4217   I = Data.LastprivateCopies.begin();
4218   for (const Expr *E : Data.LastprivateVars) {
4219     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4220     Privates.emplace_back(
4221         C.getDeclAlign(VD),
4222         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4223                          /*PrivateElemInit=*/nullptr));
4224     ++I;
4225   }
4226   for (const VarDecl *VD : Data.PrivateLocals) {
4227     if (isAllocatableDecl(VD))
4228       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4229     else
4230       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4231   }
4232   llvm::stable_sort(Privates,
4233                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4234                       return L.first > R.first;
4235                     });
4236   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4237   // Build type kmp_routine_entry_t (if not built yet).
4238   emitKmpRoutineEntryT(KmpInt32Ty);
4239   // Build type kmp_task_t (if not built yet).
4240   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4241     if (SavedKmpTaskloopTQTy.isNull()) {
4242       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4243           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4244     }
4245     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4246   } else {
4247     assert((D.getDirectiveKind() == OMPD_task ||
4248             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4249             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4250            "Expected taskloop, task or target directive");
4251     if (SavedKmpTaskTQTy.isNull()) {
4252       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4253           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4254     }
4255     KmpTaskTQTy = SavedKmpTaskTQTy;
4256   }
4257   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4258   // Build particular struct kmp_task_t for the given task.
4259   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4260       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4261   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4262   QualType KmpTaskTWithPrivatesPtrQTy =
4263       C.getPointerType(KmpTaskTWithPrivatesQTy);
4264   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4265   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4266       KmpTaskTWithPrivatesTy->getPointerTo();
4267   llvm::Value *KmpTaskTWithPrivatesTySize =
4268       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4269   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4270 
4271   // Emit initial values for private copies (if any).
4272   llvm::Value *TaskPrivatesMap = nullptr;
4273   llvm::Type *TaskPrivatesMapTy =
4274       std::next(TaskFunction->arg_begin(), 3)->getType();
4275   if (!Privates.empty()) {
4276     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4277     TaskPrivatesMap =
4278         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4279     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4280         TaskPrivatesMap, TaskPrivatesMapTy);
4281   } else {
4282     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4283         cast<llvm::PointerType>(TaskPrivatesMapTy));
4284   }
4285   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4286   // kmp_task_t *tt);
4287   llvm::Function *TaskEntry = emitProxyTaskFunction(
4288       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4289       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4290       TaskPrivatesMap);
4291 
4292   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4293   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4294   // kmp_routine_entry_t *task_entry);
4295   // Task flags. Format is taken from
4296   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4297   // description of kmp_tasking_flags struct.
4298   enum {
4299     TiedFlag = 0x1,
4300     FinalFlag = 0x2,
4301     DestructorsFlag = 0x8,
4302     PriorityFlag = 0x20,
4303     DetachableFlag = 0x40,
4304   };
4305   unsigned Flags = Data.Tied ? TiedFlag : 0;
4306   bool NeedsCleanup = false;
4307   if (!Privates.empty()) {
4308     NeedsCleanup =
4309         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4310     if (NeedsCleanup)
4311       Flags = Flags | DestructorsFlag;
4312   }
4313   if (Data.Priority.getInt())
4314     Flags = Flags | PriorityFlag;
4315   if (D.hasClausesOfKind<OMPDetachClause>())
4316     Flags = Flags | DetachableFlag;
4317   llvm::Value *TaskFlags =
4318       Data.Final.getPointer()
4319           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4320                                      CGF.Builder.getInt32(FinalFlag),
4321                                      CGF.Builder.getInt32(/*C=*/0))
4322           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4323   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4324   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4325   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4326       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4327       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4328           TaskEntry, KmpRoutineEntryPtrTy)};
4329   llvm::Value *NewTask;
4330   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4331     // Check if we have any device clause associated with the directive.
4332     const Expr *Device = nullptr;
4333     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4334       Device = C->getDevice();
4335     // Emit device ID if any otherwise use default value.
4336     llvm::Value *DeviceID;
4337     if (Device)
4338       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4339                                            CGF.Int64Ty, /*isSigned=*/true);
4340     else
4341       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4342     AllocArgs.push_back(DeviceID);
4343     NewTask = CGF.EmitRuntimeCall(
4344         OMPBuilder.getOrCreateRuntimeFunction(
4345             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4346         AllocArgs);
4347   } else {
4348     NewTask =
4349         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4350                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4351                             AllocArgs);
4352   }
4353   // Emit detach clause initialization.
4354   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4355   // task_descriptor);
4356   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4357     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4358     LValue EvtLVal = CGF.EmitLValue(Evt);
4359 
4360     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4361     // int gtid, kmp_task_t *task);
4362     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4363     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4364     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4365     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4366         OMPBuilder.getOrCreateRuntimeFunction(
4367             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4368         {Loc, Tid, NewTask});
4369     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4370                                       Evt->getExprLoc());
4371     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4372   }
4373   // Process affinity clauses.
4374   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4375     // Process list of affinity data.
4376     ASTContext &C = CGM.getContext();
4377     Address AffinitiesArray = Address::invalid();
4378     // Calculate number of elements to form the array of affinity data.
4379     llvm::Value *NumOfElements = nullptr;
4380     unsigned NumAffinities = 0;
4381     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4382       if (const Expr *Modifier = C->getModifier()) {
4383         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4384         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4385           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4386           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4387           NumOfElements =
4388               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4389         }
4390       } else {
4391         NumAffinities += C->varlist_size();
4392       }
4393     }
4394     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4395     // Fields ids in kmp_task_affinity_info record.
4396     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4397 
4398     QualType KmpTaskAffinityInfoArrayTy;
4399     if (NumOfElements) {
4400       NumOfElements = CGF.Builder.CreateNUWAdd(
4401           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4402       OpaqueValueExpr OVE(
4403           Loc,
4404           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4405           VK_RValue);
4406       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4407                                                     RValue::get(NumOfElements));
4408       KmpTaskAffinityInfoArrayTy =
4409           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4410                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4411       // Properly emit variable-sized array.
4412       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4413                                            ImplicitParamDecl::Other);
4414       CGF.EmitVarDecl(*PD);
4415       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4416       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4417                                                 /*isSigned=*/false);
4418     } else {
4419       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4420           KmpTaskAffinityInfoTy,
4421           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4422           ArrayType::Normal, /*IndexTypeQuals=*/0);
4423       AffinitiesArray =
4424           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4425       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4426       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4427                                              /*isSigned=*/false);
4428     }
4429 
4430     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4431     // Fill array by elements without iterators.
4432     unsigned Pos = 0;
4433     bool HasIterator = false;
4434     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4435       if (C->getModifier()) {
4436         HasIterator = true;
4437         continue;
4438       }
4439       for (const Expr *E : C->varlists()) {
4440         llvm::Value *Addr;
4441         llvm::Value *Size;
4442         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4443         LValue Base =
4444             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4445                                KmpTaskAffinityInfoTy);
4446         // affs[i].base_addr = &<Affinities[i].second>;
4447         LValue BaseAddrLVal = CGF.EmitLValueForField(
4448             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4449         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4450                               BaseAddrLVal);
4451         // affs[i].len = sizeof(<Affinities[i].second>);
4452         LValue LenLVal = CGF.EmitLValueForField(
4453             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4454         CGF.EmitStoreOfScalar(Size, LenLVal);
4455         ++Pos;
4456       }
4457     }
4458     LValue PosLVal;
4459     if (HasIterator) {
4460       PosLVal = CGF.MakeAddrLValue(
4461           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4462           C.getSizeType());
4463       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4464     }
4465     // Process elements with iterators.
4466     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4467       const Expr *Modifier = C->getModifier();
4468       if (!Modifier)
4469         continue;
4470       OMPIteratorGeneratorScope IteratorScope(
4471           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4472       for (const Expr *E : C->varlists()) {
4473         llvm::Value *Addr;
4474         llvm::Value *Size;
4475         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4476         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4477         LValue Base = CGF.MakeAddrLValue(
4478             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4479                     AffinitiesArray.getAlignment()),
4480             KmpTaskAffinityInfoTy);
4481         // affs[i].base_addr = &<Affinities[i].second>;
4482         LValue BaseAddrLVal = CGF.EmitLValueForField(
4483             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4484         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4485                               BaseAddrLVal);
4486         // affs[i].len = sizeof(<Affinities[i].second>);
4487         LValue LenLVal = CGF.EmitLValueForField(
4488             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4489         CGF.EmitStoreOfScalar(Size, LenLVal);
4490         Idx = CGF.Builder.CreateNUWAdd(
4491             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4492         CGF.EmitStoreOfScalar(Idx, PosLVal);
4493       }
4494     }
4495     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4496     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4497     // naffins, kmp_task_affinity_info_t *affin_list);
4498     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4499     llvm::Value *GTid = getThreadID(CGF, Loc);
4500     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4501         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4502     // FIXME: Emit the function and ignore its result for now unless the
4503     // runtime function is properly implemented.
4504     (void)CGF.EmitRuntimeCall(
4505         OMPBuilder.getOrCreateRuntimeFunction(
4506             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4507         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4508   }
4509   llvm::Value *NewTaskNewTaskTTy =
4510       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4511           NewTask, KmpTaskTWithPrivatesPtrTy);
4512   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4513                                                KmpTaskTWithPrivatesQTy);
4514   LValue TDBase =
4515       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4516   // Fill the data in the resulting kmp_task_t record.
4517   // Copy shareds if there are any.
4518   Address KmpTaskSharedsPtr = Address::invalid();
4519   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4520     KmpTaskSharedsPtr =
4521         Address(CGF.EmitLoadOfScalar(
4522                     CGF.EmitLValueForField(
4523                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4524                                            KmpTaskTShareds)),
4525                     Loc),
4526                 CGM.getNaturalTypeAlignment(SharedsTy));
4527     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4528     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4529     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4530   }
4531   // Emit initial values for private copies (if any).
4532   TaskResultTy Result;
4533   if (!Privates.empty()) {
4534     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4535                      SharedsTy, SharedsPtrTy, Data, Privates,
4536                      /*ForDup=*/false);
4537     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4538         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4539       Result.TaskDupFn = emitTaskDupFunction(
4540           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4541           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4542           /*WithLastIter=*/!Data.LastprivateVars.empty());
4543     }
4544   }
4545   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4546   enum { Priority = 0, Destructors = 1 };
4547   // Provide pointer to function with destructors for privates.
4548   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4549   const RecordDecl *KmpCmplrdataUD =
4550       (*FI)->getType()->getAsUnionType()->getDecl();
4551   if (NeedsCleanup) {
4552     llvm::Value *DestructorFn = emitDestructorsFunction(
4553         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4554         KmpTaskTWithPrivatesQTy);
4555     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4556     LValue DestructorsLV = CGF.EmitLValueForField(
4557         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4558     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4559                               DestructorFn, KmpRoutineEntryPtrTy),
4560                           DestructorsLV);
4561   }
4562   // Set priority.
4563   if (Data.Priority.getInt()) {
4564     LValue Data2LV = CGF.EmitLValueForField(
4565         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4566     LValue PriorityLV = CGF.EmitLValueForField(
4567         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4568     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4569   }
4570   Result.NewTask = NewTask;
4571   Result.TaskEntry = TaskEntry;
4572   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4573   Result.TDBase = TDBase;
4574   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4575   return Result;
4576 }
4577 
4578 namespace {
4579 /// Dependence kind for RTL.
4580 enum RTLDependenceKindTy {
4581   DepIn = 0x01,
4582   DepInOut = 0x3,
4583   DepMutexInOutSet = 0x4
4584 };
4585 /// Fields ids in kmp_depend_info record.
4586 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4587 } // namespace
4588 
4589 /// Translates internal dependency kind into the runtime kind.
4590 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4591   RTLDependenceKindTy DepKind;
4592   switch (K) {
4593   case OMPC_DEPEND_in:
4594     DepKind = DepIn;
4595     break;
4596   // Out and InOut dependencies must use the same code.
4597   case OMPC_DEPEND_out:
4598   case OMPC_DEPEND_inout:
4599     DepKind = DepInOut;
4600     break;
4601   case OMPC_DEPEND_mutexinoutset:
4602     DepKind = DepMutexInOutSet;
4603     break;
4604   case OMPC_DEPEND_source:
4605   case OMPC_DEPEND_sink:
4606   case OMPC_DEPEND_depobj:
4607   case OMPC_DEPEND_unknown:
4608     llvm_unreachable("Unknown task dependence type");
4609   }
4610   return DepKind;
4611 }
4612 
4613 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4614 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4615                            QualType &FlagsTy) {
4616   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4617   if (KmpDependInfoTy.isNull()) {
4618     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4619     KmpDependInfoRD->startDefinition();
4620     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4621     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4622     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4623     KmpDependInfoRD->completeDefinition();
4624     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4625   }
4626 }
4627 
4628 std::pair<llvm::Value *, LValue>
4629 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4630                                    SourceLocation Loc) {
4631   ASTContext &C = CGM.getContext();
4632   QualType FlagsTy;
4633   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4634   RecordDecl *KmpDependInfoRD =
4635       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4636   LValue Base = CGF.EmitLoadOfPointerLValue(
4637       DepobjLVal.getAddress(CGF),
4638       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4639   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4640   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4641           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4642   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4643                             Base.getTBAAInfo());
4644   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4645       Addr.getPointer(),
4646       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4647   LValue NumDepsBase = CGF.MakeAddrLValue(
4648       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4649       Base.getBaseInfo(), Base.getTBAAInfo());
4650   // NumDeps = deps[i].base_addr;
4651   LValue BaseAddrLVal = CGF.EmitLValueForField(
4652       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4653   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4654   return std::make_pair(NumDeps, Base);
4655 }
4656 
4657 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4658                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4659                            const OMPTaskDataTy::DependData &Data,
4660                            Address DependenciesArray) {
4661   CodeGenModule &CGM = CGF.CGM;
4662   ASTContext &C = CGM.getContext();
4663   QualType FlagsTy;
4664   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4665   RecordDecl *KmpDependInfoRD =
4666       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4667   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4668 
4669   OMPIteratorGeneratorScope IteratorScope(
4670       CGF, cast_or_null<OMPIteratorExpr>(
4671                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4672                                  : nullptr));
4673   for (const Expr *E : Data.DepExprs) {
4674     llvm::Value *Addr;
4675     llvm::Value *Size;
4676     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4677     LValue Base;
4678     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4679       Base = CGF.MakeAddrLValue(
4680           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4681     } else {
4682       LValue &PosLVal = *Pos.get<LValue *>();
4683       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4684       Base = CGF.MakeAddrLValue(
4685           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4686                   DependenciesArray.getAlignment()),
4687           KmpDependInfoTy);
4688     }
4689     // deps[i].base_addr = &<Dependencies[i].second>;
4690     LValue BaseAddrLVal = CGF.EmitLValueForField(
4691         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4692     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4693                           BaseAddrLVal);
4694     // deps[i].len = sizeof(<Dependencies[i].second>);
4695     LValue LenLVal = CGF.EmitLValueForField(
4696         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4697     CGF.EmitStoreOfScalar(Size, LenLVal);
4698     // deps[i].flags = <Dependencies[i].first>;
4699     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4700     LValue FlagsLVal = CGF.EmitLValueForField(
4701         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4702     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4703                           FlagsLVal);
4704     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4705       ++(*P);
4706     } else {
4707       LValue &PosLVal = *Pos.get<LValue *>();
4708       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4709       Idx = CGF.Builder.CreateNUWAdd(Idx,
4710                                      llvm::ConstantInt::get(Idx->getType(), 1));
4711       CGF.EmitStoreOfScalar(Idx, PosLVal);
4712     }
4713   }
4714 }
4715 
4716 static SmallVector<llvm::Value *, 4>
4717 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4718                         const OMPTaskDataTy::DependData &Data) {
4719   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4720          "Expected depobj dependecy kind.");
4721   SmallVector<llvm::Value *, 4> Sizes;
4722   SmallVector<LValue, 4> SizeLVals;
4723   ASTContext &C = CGF.getContext();
4724   QualType FlagsTy;
4725   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4726   RecordDecl *KmpDependInfoRD =
4727       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4728   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4729   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4730   {
4731     OMPIteratorGeneratorScope IteratorScope(
4732         CGF, cast_or_null<OMPIteratorExpr>(
4733                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4734                                    : nullptr));
4735     for (const Expr *E : Data.DepExprs) {
4736       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4737       LValue Base = CGF.EmitLoadOfPointerLValue(
4738           DepobjLVal.getAddress(CGF),
4739           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4740       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4741           Base.getAddress(CGF), KmpDependInfoPtrT);
4742       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4743                                 Base.getTBAAInfo());
4744       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4745           Addr.getPointer(),
4746           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4747       LValue NumDepsBase = CGF.MakeAddrLValue(
4748           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4749           Base.getBaseInfo(), Base.getTBAAInfo());
4750       // NumDeps = deps[i].base_addr;
4751       LValue BaseAddrLVal = CGF.EmitLValueForField(
4752           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4753       llvm::Value *NumDeps =
4754           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4755       LValue NumLVal = CGF.MakeAddrLValue(
4756           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4757           C.getUIntPtrType());
4758       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4759                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4760       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4761       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4762       CGF.EmitStoreOfScalar(Add, NumLVal);
4763       SizeLVals.push_back(NumLVal);
4764     }
4765   }
4766   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4767     llvm::Value *Size =
4768         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4769     Sizes.push_back(Size);
4770   }
4771   return Sizes;
4772 }
4773 
4774 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4775                                LValue PosLVal,
4776                                const OMPTaskDataTy::DependData &Data,
4777                                Address DependenciesArray) {
4778   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4779          "Expected depobj dependecy kind.");
4780   ASTContext &C = CGF.getContext();
4781   QualType FlagsTy;
4782   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4783   RecordDecl *KmpDependInfoRD =
4784       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4785   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4786   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4787   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4788   {
4789     OMPIteratorGeneratorScope IteratorScope(
4790         CGF, cast_or_null<OMPIteratorExpr>(
4791                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4792                                    : nullptr));
4793     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4794       const Expr *E = Data.DepExprs[I];
4795       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4796       LValue Base = CGF.EmitLoadOfPointerLValue(
4797           DepobjLVal.getAddress(CGF),
4798           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4799       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4800           Base.getAddress(CGF), KmpDependInfoPtrT);
4801       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4802                                 Base.getTBAAInfo());
4803 
4804       // Get number of elements in a single depobj.
4805       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4806           Addr.getPointer(),
4807           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4808       LValue NumDepsBase = CGF.MakeAddrLValue(
4809           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4810           Base.getBaseInfo(), Base.getTBAAInfo());
4811       // NumDeps = deps[i].base_addr;
4812       LValue BaseAddrLVal = CGF.EmitLValueForField(
4813           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4814       llvm::Value *NumDeps =
4815           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4816 
4817       // memcopy dependency data.
4818       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4819           ElSize,
4820           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4821       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4822       Address DepAddr =
4823           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4824                   DependenciesArray.getAlignment());
4825       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4826 
4827       // Increase pos.
4828       // pos += size;
4829       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4830       CGF.EmitStoreOfScalar(Add, PosLVal);
4831     }
4832   }
4833 }
4834 
4835 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4836     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4837     SourceLocation Loc) {
4838   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4839         return D.DepExprs.empty();
4840       }))
4841     return std::make_pair(nullptr, Address::invalid());
4842   // Process list of dependencies.
4843   ASTContext &C = CGM.getContext();
4844   Address DependenciesArray = Address::invalid();
4845   llvm::Value *NumOfElements = nullptr;
4846   unsigned NumDependencies = std::accumulate(
4847       Dependencies.begin(), Dependencies.end(), 0,
4848       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4849         return D.DepKind == OMPC_DEPEND_depobj
4850                    ? V
4851                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4852       });
4853   QualType FlagsTy;
4854   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4855   bool HasDepobjDeps = false;
4856   bool HasRegularWithIterators = false;
4857   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4858   llvm::Value *NumOfRegularWithIterators =
4859       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4860   // Calculate number of depobj dependecies and regular deps with the iterators.
4861   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4862     if (D.DepKind == OMPC_DEPEND_depobj) {
4863       SmallVector<llvm::Value *, 4> Sizes =
4864           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4865       for (llvm::Value *Size : Sizes) {
4866         NumOfDepobjElements =
4867             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4868       }
4869       HasDepobjDeps = true;
4870       continue;
4871     }
4872     // Include number of iterations, if any.
4873     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4874       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4875         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4876         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4877         NumOfRegularWithIterators =
4878             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4879       }
4880       HasRegularWithIterators = true;
4881       continue;
4882     }
4883   }
4884 
4885   QualType KmpDependInfoArrayTy;
4886   if (HasDepobjDeps || HasRegularWithIterators) {
4887     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4888                                            /*isSigned=*/false);
4889     if (HasDepobjDeps) {
4890       NumOfElements =
4891           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4892     }
4893     if (HasRegularWithIterators) {
4894       NumOfElements =
4895           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4896     }
4897     OpaqueValueExpr OVE(Loc,
4898                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4899                         VK_RValue);
4900     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4901                                                   RValue::get(NumOfElements));
4902     KmpDependInfoArrayTy =
4903         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4904                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4905     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4906     // Properly emit variable-sized array.
4907     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4908                                          ImplicitParamDecl::Other);
4909     CGF.EmitVarDecl(*PD);
4910     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4911     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4912                                               /*isSigned=*/false);
4913   } else {
4914     KmpDependInfoArrayTy = C.getConstantArrayType(
4915         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4916         ArrayType::Normal, /*IndexTypeQuals=*/0);
4917     DependenciesArray =
4918         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4919     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4920     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4921                                            /*isSigned=*/false);
4922   }
4923   unsigned Pos = 0;
4924   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4925     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4926         Dependencies[I].IteratorExpr)
4927       continue;
4928     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4929                    DependenciesArray);
4930   }
4931   // Copy regular dependecies with iterators.
4932   LValue PosLVal = CGF.MakeAddrLValue(
4933       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4934   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4935   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4936     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4937         !Dependencies[I].IteratorExpr)
4938       continue;
4939     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4940                    DependenciesArray);
4941   }
4942   // Copy final depobj arrays without iterators.
4943   if (HasDepobjDeps) {
4944     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4945       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4946         continue;
4947       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4948                          DependenciesArray);
4949     }
4950   }
4951   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4952       DependenciesArray, CGF.VoidPtrTy);
4953   return std::make_pair(NumOfElements, DependenciesArray);
4954 }
4955 
4956 Address CGOpenMPRuntime::emitDepobjDependClause(
4957     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4958     SourceLocation Loc) {
4959   if (Dependencies.DepExprs.empty())
4960     return Address::invalid();
4961   // Process list of dependencies.
4962   ASTContext &C = CGM.getContext();
4963   Address DependenciesArray = Address::invalid();
4964   unsigned NumDependencies = Dependencies.DepExprs.size();
4965   QualType FlagsTy;
4966   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4967   RecordDecl *KmpDependInfoRD =
4968       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4969 
4970   llvm::Value *Size;
4971   // Define type kmp_depend_info[<Dependencies.size()>];
4972   // For depobj reserve one extra element to store the number of elements.
4973   // It is required to handle depobj(x) update(in) construct.
4974   // kmp_depend_info[<Dependencies.size()>] deps;
4975   llvm::Value *NumDepsVal;
4976   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4977   if (const auto *IE =
4978           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4979     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4980     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4981       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4982       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4983       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4984     }
4985     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4986                                     NumDepsVal);
4987     CharUnits SizeInBytes =
4988         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4989     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4990     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4991     NumDepsVal =
4992         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4993   } else {
4994     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4995         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4996         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4997     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4998     Size = CGM.getSize(Sz.alignTo(Align));
4999     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5000   }
5001   // Need to allocate on the dynamic memory.
5002   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5003   // Use default allocator.
5004   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5005   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5006 
5007   llvm::Value *Addr =
5008       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5009                               CGM.getModule(), OMPRTL___kmpc_alloc),
5010                           Args, ".dep.arr.addr");
5011   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5012       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5013   DependenciesArray = Address(Addr, Align);
5014   // Write number of elements in the first element of array for depobj.
5015   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5016   // deps[i].base_addr = NumDependencies;
5017   LValue BaseAddrLVal = CGF.EmitLValueForField(
5018       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5019   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5020   llvm::PointerUnion<unsigned *, LValue *> Pos;
5021   unsigned Idx = 1;
5022   LValue PosLVal;
5023   if (Dependencies.IteratorExpr) {
5024     PosLVal = CGF.MakeAddrLValue(
5025         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5026         C.getSizeType());
5027     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5028                           /*IsInit=*/true);
5029     Pos = &PosLVal;
5030   } else {
5031     Pos = &Idx;
5032   }
5033   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5034   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5035       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5036   return DependenciesArray;
5037 }
5038 
5039 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5040                                         SourceLocation Loc) {
5041   ASTContext &C = CGM.getContext();
5042   QualType FlagsTy;
5043   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5044   LValue Base = CGF.EmitLoadOfPointerLValue(
5045       DepobjLVal.getAddress(CGF),
5046       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5047   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5048   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5049       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5050   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5051       Addr.getPointer(),
5052       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5053   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5054                                                                CGF.VoidPtrTy);
5055   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5056   // Use default allocator.
5057   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5058   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5059 
5060   // _kmpc_free(gtid, addr, nullptr);
5061   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5062                                 CGM.getModule(), OMPRTL___kmpc_free),
5063                             Args);
5064 }
5065 
5066 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5067                                        OpenMPDependClauseKind NewDepKind,
5068                                        SourceLocation Loc) {
5069   ASTContext &C = CGM.getContext();
5070   QualType FlagsTy;
5071   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5072   RecordDecl *KmpDependInfoRD =
5073       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5074   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5075   llvm::Value *NumDeps;
5076   LValue Base;
5077   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5078 
5079   Address Begin = Base.getAddress(CGF);
5080   // Cast from pointer to array type to pointer to single element.
5081   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5082   // The basic structure here is a while-do loop.
5083   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5084   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5085   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5086   CGF.EmitBlock(BodyBB);
5087   llvm::PHINode *ElementPHI =
5088       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5089   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5090   Begin = Address(ElementPHI, Begin.getAlignment());
5091   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5092                             Base.getTBAAInfo());
5093   // deps[i].flags = NewDepKind;
5094   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5095   LValue FlagsLVal = CGF.EmitLValueForField(
5096       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5097   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5098                         FlagsLVal);
5099 
5100   // Shift the address forward by one element.
5101   Address ElementNext =
5102       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5103   ElementPHI->addIncoming(ElementNext.getPointer(),
5104                           CGF.Builder.GetInsertBlock());
5105   llvm::Value *IsEmpty =
5106       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5107   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5108   // Done.
5109   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5110 }
5111 
5112 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5113                                    const OMPExecutableDirective &D,
5114                                    llvm::Function *TaskFunction,
5115                                    QualType SharedsTy, Address Shareds,
5116                                    const Expr *IfCond,
5117                                    const OMPTaskDataTy &Data) {
5118   if (!CGF.HaveInsertPoint())
5119     return;
5120 
5121   TaskResultTy Result =
5122       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5123   llvm::Value *NewTask = Result.NewTask;
5124   llvm::Function *TaskEntry = Result.TaskEntry;
5125   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5126   LValue TDBase = Result.TDBase;
5127   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5128   // Process list of dependences.
5129   Address DependenciesArray = Address::invalid();
5130   llvm::Value *NumOfElements;
5131   std::tie(NumOfElements, DependenciesArray) =
5132       emitDependClause(CGF, Data.Dependences, Loc);
5133 
5134   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5135   // libcall.
5136   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5137   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5138   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5139   // list is not empty
5140   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5141   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5142   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5143   llvm::Value *DepTaskArgs[7];
5144   if (!Data.Dependences.empty()) {
5145     DepTaskArgs[0] = UpLoc;
5146     DepTaskArgs[1] = ThreadID;
5147     DepTaskArgs[2] = NewTask;
5148     DepTaskArgs[3] = NumOfElements;
5149     DepTaskArgs[4] = DependenciesArray.getPointer();
5150     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5151     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5152   }
5153   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5154                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5155     if (!Data.Tied) {
5156       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5157       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5158       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5159     }
5160     if (!Data.Dependences.empty()) {
5161       CGF.EmitRuntimeCall(
5162           OMPBuilder.getOrCreateRuntimeFunction(
5163               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5164           DepTaskArgs);
5165     } else {
5166       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5167                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5168                           TaskArgs);
5169     }
5170     // Check if parent region is untied and build return for untied task;
5171     if (auto *Region =
5172             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5173       Region->emitUntiedSwitch(CGF);
5174   };
5175 
5176   llvm::Value *DepWaitTaskArgs[6];
5177   if (!Data.Dependences.empty()) {
5178     DepWaitTaskArgs[0] = UpLoc;
5179     DepWaitTaskArgs[1] = ThreadID;
5180     DepWaitTaskArgs[2] = NumOfElements;
5181     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5182     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5183     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5184   }
5185   auto &M = CGM.getModule();
5186   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5187                         TaskEntry, &Data, &DepWaitTaskArgs,
5188                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5189     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5190     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5191     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5192     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5193     // is specified.
5194     if (!Data.Dependences.empty())
5195       CGF.EmitRuntimeCall(
5196           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5197           DepWaitTaskArgs);
5198     // Call proxy_task_entry(gtid, new_task);
5199     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5200                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5201       Action.Enter(CGF);
5202       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5203       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5204                                                           OutlinedFnArgs);
5205     };
5206 
5207     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5208     // kmp_task_t *new_task);
5209     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5210     // kmp_task_t *new_task);
5211     RegionCodeGenTy RCG(CodeGen);
5212     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5213                               M, OMPRTL___kmpc_omp_task_begin_if0),
5214                           TaskArgs,
5215                           OMPBuilder.getOrCreateRuntimeFunction(
5216                               M, OMPRTL___kmpc_omp_task_complete_if0),
5217                           TaskArgs);
5218     RCG.setAction(Action);
5219     RCG(CGF);
5220   };
5221 
5222   if (IfCond) {
5223     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5224   } else {
5225     RegionCodeGenTy ThenRCG(ThenCodeGen);
5226     ThenRCG(CGF);
5227   }
5228 }
5229 
5230 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5231                                        const OMPLoopDirective &D,
5232                                        llvm::Function *TaskFunction,
5233                                        QualType SharedsTy, Address Shareds,
5234                                        const Expr *IfCond,
5235                                        const OMPTaskDataTy &Data) {
5236   if (!CGF.HaveInsertPoint())
5237     return;
5238   TaskResultTy Result =
5239       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5240   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5241   // libcall.
5242   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5243   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5244   // sched, kmp_uint64 grainsize, void *task_dup);
5245   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5246   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5247   llvm::Value *IfVal;
5248   if (IfCond) {
5249     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5250                                       /*isSigned=*/true);
5251   } else {
5252     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5253   }
5254 
5255   LValue LBLVal = CGF.EmitLValueForField(
5256       Result.TDBase,
5257       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5258   const auto *LBVar =
5259       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5260   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5261                        LBLVal.getQuals(),
5262                        /*IsInitializer=*/true);
5263   LValue UBLVal = CGF.EmitLValueForField(
5264       Result.TDBase,
5265       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5266   const auto *UBVar =
5267       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5268   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5269                        UBLVal.getQuals(),
5270                        /*IsInitializer=*/true);
5271   LValue StLVal = CGF.EmitLValueForField(
5272       Result.TDBase,
5273       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5274   const auto *StVar =
5275       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5276   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5277                        StLVal.getQuals(),
5278                        /*IsInitializer=*/true);
5279   // Store reductions address.
5280   LValue RedLVal = CGF.EmitLValueForField(
5281       Result.TDBase,
5282       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5283   if (Data.Reductions) {
5284     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5285   } else {
5286     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5287                                CGF.getContext().VoidPtrTy);
5288   }
5289   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5290   llvm::Value *TaskArgs[] = {
5291       UpLoc,
5292       ThreadID,
5293       Result.NewTask,
5294       IfVal,
5295       LBLVal.getPointer(CGF),
5296       UBLVal.getPointer(CGF),
5297       CGF.EmitLoadOfScalar(StLVal, Loc),
5298       llvm::ConstantInt::getSigned(
5299           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5300       llvm::ConstantInt::getSigned(
5301           CGF.IntTy, Data.Schedule.getPointer()
5302                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5303                          : NoSchedule),
5304       Data.Schedule.getPointer()
5305           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5306                                       /*isSigned=*/false)
5307           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5308       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5309                              Result.TaskDupFn, CGF.VoidPtrTy)
5310                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5311   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5312                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5313                       TaskArgs);
5314 }
5315 
5316 /// Emit reduction operation for each element of array (required for
5317 /// array sections) LHS op = RHS.
5318 /// \param Type Type of array.
5319 /// \param LHSVar Variable on the left side of the reduction operation
5320 /// (references element of array in original variable).
5321 /// \param RHSVar Variable on the right side of the reduction operation
5322 /// (references element of array in original variable).
5323 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5324 /// RHSVar.
5325 static void EmitOMPAggregateReduction(
5326     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5327     const VarDecl *RHSVar,
5328     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5329                                   const Expr *, const Expr *)> &RedOpGen,
5330     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5331     const Expr *UpExpr = nullptr) {
5332   // Perform element-by-element initialization.
5333   QualType ElementTy;
5334   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5335   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5336 
5337   // Drill down to the base element type on both arrays.
5338   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5339   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5340 
5341   llvm::Value *RHSBegin = RHSAddr.getPointer();
5342   llvm::Value *LHSBegin = LHSAddr.getPointer();
5343   // Cast from pointer to array type to pointer to single element.
5344   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5345   // The basic structure here is a while-do loop.
5346   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5347   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5348   llvm::Value *IsEmpty =
5349       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5350   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5351 
5352   // Enter the loop body, making that address the current address.
5353   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5354   CGF.EmitBlock(BodyBB);
5355 
5356   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5357 
5358   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5359       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5360   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5361   Address RHSElementCurrent =
5362       Address(RHSElementPHI,
5363               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5364 
5365   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5366       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5367   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5368   Address LHSElementCurrent =
5369       Address(LHSElementPHI,
5370               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371 
5372   // Emit copy.
5373   CodeGenFunction::OMPPrivateScope Scope(CGF);
5374   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5375   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5376   Scope.Privatize();
5377   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5378   Scope.ForceCleanup();
5379 
5380   // Shift the address forward by one element.
5381   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5382       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5383   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5384       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5385   // Check whether we've reached the end.
5386   llvm::Value *Done =
5387       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5388   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5389   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5390   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5391 
5392   // Done.
5393   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5394 }
5395 
5396 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5397 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5398 /// UDR combiner function.
5399 static void emitReductionCombiner(CodeGenFunction &CGF,
5400                                   const Expr *ReductionOp) {
5401   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5402     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5403       if (const auto *DRE =
5404               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5405         if (const auto *DRD =
5406                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5407           std::pair<llvm::Function *, llvm::Function *> Reduction =
5408               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5409           RValue Func = RValue::get(Reduction.first);
5410           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5411           CGF.EmitIgnoredExpr(ReductionOp);
5412           return;
5413         }
5414   CGF.EmitIgnoredExpr(ReductionOp);
5415 }
5416 
5417 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5418     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5419     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5420     ArrayRef<const Expr *> ReductionOps) {
5421   ASTContext &C = CGM.getContext();
5422 
5423   // void reduction_func(void *LHSArg, void *RHSArg);
5424   FunctionArgList Args;
5425   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5426                            ImplicitParamDecl::Other);
5427   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5428                            ImplicitParamDecl::Other);
5429   Args.push_back(&LHSArg);
5430   Args.push_back(&RHSArg);
5431   const auto &CGFI =
5432       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5433   std::string Name = getName({"omp", "reduction", "reduction_func"});
5434   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5435                                     llvm::GlobalValue::InternalLinkage, Name,
5436                                     &CGM.getModule());
5437   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5438   Fn->setDoesNotRecurse();
5439   CodeGenFunction CGF(CGM);
5440   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5441 
5442   // Dst = (void*[n])(LHSArg);
5443   // Src = (void*[n])(RHSArg);
5444   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5445       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5446       ArgsType), CGF.getPointerAlign());
5447   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5448       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5449       ArgsType), CGF.getPointerAlign());
5450 
5451   //  ...
5452   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5453   //  ...
5454   CodeGenFunction::OMPPrivateScope Scope(CGF);
5455   auto IPriv = Privates.begin();
5456   unsigned Idx = 0;
5457   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5458     const auto *RHSVar =
5459         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5460     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5461       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5462     });
5463     const auto *LHSVar =
5464         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5465     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5466       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5467     });
5468     QualType PrivTy = (*IPriv)->getType();
5469     if (PrivTy->isVariablyModifiedType()) {
5470       // Get array size and emit VLA type.
5471       ++Idx;
5472       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5473       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5474       const VariableArrayType *VLA =
5475           CGF.getContext().getAsVariableArrayType(PrivTy);
5476       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5477       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5478           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5479       CGF.EmitVariablyModifiedType(PrivTy);
5480     }
5481   }
5482   Scope.Privatize();
5483   IPriv = Privates.begin();
5484   auto ILHS = LHSExprs.begin();
5485   auto IRHS = RHSExprs.begin();
5486   for (const Expr *E : ReductionOps) {
5487     if ((*IPriv)->getType()->isArrayType()) {
5488       // Emit reduction for array section.
5489       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5490       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5491       EmitOMPAggregateReduction(
5492           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5493           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5494             emitReductionCombiner(CGF, E);
5495           });
5496     } else {
5497       // Emit reduction for array subscript or single variable.
5498       emitReductionCombiner(CGF, E);
5499     }
5500     ++IPriv;
5501     ++ILHS;
5502     ++IRHS;
5503   }
5504   Scope.ForceCleanup();
5505   CGF.FinishFunction();
5506   return Fn;
5507 }
5508 
5509 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5510                                                   const Expr *ReductionOp,
5511                                                   const Expr *PrivateRef,
5512                                                   const DeclRefExpr *LHS,
5513                                                   const DeclRefExpr *RHS) {
5514   if (PrivateRef->getType()->isArrayType()) {
5515     // Emit reduction for array section.
5516     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5517     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5518     EmitOMPAggregateReduction(
5519         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5520         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5521           emitReductionCombiner(CGF, ReductionOp);
5522         });
5523   } else {
5524     // Emit reduction for array subscript or single variable.
5525     emitReductionCombiner(CGF, ReductionOp);
5526   }
5527 }
5528 
5529 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5530                                     ArrayRef<const Expr *> Privates,
5531                                     ArrayRef<const Expr *> LHSExprs,
5532                                     ArrayRef<const Expr *> RHSExprs,
5533                                     ArrayRef<const Expr *> ReductionOps,
5534                                     ReductionOptionsTy Options) {
5535   if (!CGF.HaveInsertPoint())
5536     return;
5537 
5538   bool WithNowait = Options.WithNowait;
5539   bool SimpleReduction = Options.SimpleReduction;
5540 
5541   // Next code should be emitted for reduction:
5542   //
5543   // static kmp_critical_name lock = { 0 };
5544   //
5545   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5546   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5547   //  ...
5548   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5549   //  *(Type<n>-1*)rhs[<n>-1]);
5550   // }
5551   //
5552   // ...
5553   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5554   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5555   // RedList, reduce_func, &<lock>)) {
5556   // case 1:
5557   //  ...
5558   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5559   //  ...
5560   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5561   // break;
5562   // case 2:
5563   //  ...
5564   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5565   //  ...
5566   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5567   // break;
5568   // default:;
5569   // }
5570   //
5571   // if SimpleReduction is true, only the next code is generated:
5572   //  ...
5573   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5574   //  ...
5575 
5576   ASTContext &C = CGM.getContext();
5577 
5578   if (SimpleReduction) {
5579     CodeGenFunction::RunCleanupsScope Scope(CGF);
5580     auto IPriv = Privates.begin();
5581     auto ILHS = LHSExprs.begin();
5582     auto IRHS = RHSExprs.begin();
5583     for (const Expr *E : ReductionOps) {
5584       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5585                                   cast<DeclRefExpr>(*IRHS));
5586       ++IPriv;
5587       ++ILHS;
5588       ++IRHS;
5589     }
5590     return;
5591   }
5592 
5593   // 1. Build a list of reduction variables.
5594   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5595   auto Size = RHSExprs.size();
5596   for (const Expr *E : Privates) {
5597     if (E->getType()->isVariablyModifiedType())
5598       // Reserve place for array size.
5599       ++Size;
5600   }
5601   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5602   QualType ReductionArrayTy =
5603       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5604                              /*IndexTypeQuals=*/0);
5605   Address ReductionList =
5606       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5607   auto IPriv = Privates.begin();
5608   unsigned Idx = 0;
5609   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5610     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5611     CGF.Builder.CreateStore(
5612         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5613             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5614         Elem);
5615     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5616       // Store array size.
5617       ++Idx;
5618       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5619       llvm::Value *Size = CGF.Builder.CreateIntCast(
5620           CGF.getVLASize(
5621                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5622               .NumElts,
5623           CGF.SizeTy, /*isSigned=*/false);
5624       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5625                               Elem);
5626     }
5627   }
5628 
5629   // 2. Emit reduce_func().
5630   llvm::Function *ReductionFn = emitReductionFunction(
5631       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5632       LHSExprs, RHSExprs, ReductionOps);
5633 
5634   // 3. Create static kmp_critical_name lock = { 0 };
5635   std::string Name = getName({"reduction"});
5636   llvm::Value *Lock = getCriticalRegionLock(Name);
5637 
5638   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5639   // RedList, reduce_func, &<lock>);
5640   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5641   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5642   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5643   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5644       ReductionList.getPointer(), CGF.VoidPtrTy);
5645   llvm::Value *Args[] = {
5646       IdentTLoc,                             // ident_t *<loc>
5647       ThreadId,                              // i32 <gtid>
5648       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5649       ReductionArrayTySize,                  // size_type sizeof(RedList)
5650       RL,                                    // void *RedList
5651       ReductionFn, // void (*) (void *, void *) <reduce_func>
5652       Lock         // kmp_critical_name *&<lock>
5653   };
5654   llvm::Value *Res = CGF.EmitRuntimeCall(
5655       OMPBuilder.getOrCreateRuntimeFunction(
5656           CGM.getModule(),
5657           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5658       Args);
5659 
5660   // 5. Build switch(res)
5661   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5662   llvm::SwitchInst *SwInst =
5663       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5664 
5665   // 6. Build case 1:
5666   //  ...
5667   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5668   //  ...
5669   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5670   // break;
5671   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5672   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5673   CGF.EmitBlock(Case1BB);
5674 
5675   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5676   llvm::Value *EndArgs[] = {
5677       IdentTLoc, // ident_t *<loc>
5678       ThreadId,  // i32 <gtid>
5679       Lock       // kmp_critical_name *&<lock>
5680   };
5681   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5682                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5683     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5684     auto IPriv = Privates.begin();
5685     auto ILHS = LHSExprs.begin();
5686     auto IRHS = RHSExprs.begin();
5687     for (const Expr *E : ReductionOps) {
5688       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5689                                      cast<DeclRefExpr>(*IRHS));
5690       ++IPriv;
5691       ++ILHS;
5692       ++IRHS;
5693     }
5694   };
5695   RegionCodeGenTy RCG(CodeGen);
5696   CommonActionTy Action(
5697       nullptr, llvm::None,
5698       OMPBuilder.getOrCreateRuntimeFunction(
5699           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5700                                       : OMPRTL___kmpc_end_reduce),
5701       EndArgs);
5702   RCG.setAction(Action);
5703   RCG(CGF);
5704 
5705   CGF.EmitBranch(DefaultBB);
5706 
5707   // 7. Build case 2:
5708   //  ...
5709   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5710   //  ...
5711   // break;
5712   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5713   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5714   CGF.EmitBlock(Case2BB);
5715 
5716   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5717                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5718     auto ILHS = LHSExprs.begin();
5719     auto IRHS = RHSExprs.begin();
5720     auto IPriv = Privates.begin();
5721     for (const Expr *E : ReductionOps) {
5722       const Expr *XExpr = nullptr;
5723       const Expr *EExpr = nullptr;
5724       const Expr *UpExpr = nullptr;
5725       BinaryOperatorKind BO = BO_Comma;
5726       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5727         if (BO->getOpcode() == BO_Assign) {
5728           XExpr = BO->getLHS();
5729           UpExpr = BO->getRHS();
5730         }
5731       }
5732       // Try to emit update expression as a simple atomic.
5733       const Expr *RHSExpr = UpExpr;
5734       if (RHSExpr) {
5735         // Analyze RHS part of the whole expression.
5736         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5737                 RHSExpr->IgnoreParenImpCasts())) {
5738           // If this is a conditional operator, analyze its condition for
5739           // min/max reduction operator.
5740           RHSExpr = ACO->getCond();
5741         }
5742         if (const auto *BORHS =
5743                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5744           EExpr = BORHS->getRHS();
5745           BO = BORHS->getOpcode();
5746         }
5747       }
5748       if (XExpr) {
5749         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5750         auto &&AtomicRedGen = [BO, VD,
5751                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5752                                     const Expr *EExpr, const Expr *UpExpr) {
5753           LValue X = CGF.EmitLValue(XExpr);
5754           RValue E;
5755           if (EExpr)
5756             E = CGF.EmitAnyExpr(EExpr);
5757           CGF.EmitOMPAtomicSimpleUpdateExpr(
5758               X, E, BO, /*IsXLHSInRHSPart=*/true,
5759               llvm::AtomicOrdering::Monotonic, Loc,
5760               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5761                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5762                 PrivateScope.addPrivate(
5763                     VD, [&CGF, VD, XRValue, Loc]() {
5764                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5765                       CGF.emitOMPSimpleStore(
5766                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5767                           VD->getType().getNonReferenceType(), Loc);
5768                       return LHSTemp;
5769                     });
5770                 (void)PrivateScope.Privatize();
5771                 return CGF.EmitAnyExpr(UpExpr);
5772               });
5773         };
5774         if ((*IPriv)->getType()->isArrayType()) {
5775           // Emit atomic reduction for array section.
5776           const auto *RHSVar =
5777               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5778           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5779                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5780         } else {
5781           // Emit atomic reduction for array subscript or single variable.
5782           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5783         }
5784       } else {
5785         // Emit as a critical region.
5786         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5787                                            const Expr *, const Expr *) {
5788           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5789           std::string Name = RT.getName({"atomic_reduction"});
5790           RT.emitCriticalRegion(
5791               CGF, Name,
5792               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5793                 Action.Enter(CGF);
5794                 emitReductionCombiner(CGF, E);
5795               },
5796               Loc);
5797         };
5798         if ((*IPriv)->getType()->isArrayType()) {
5799           const auto *LHSVar =
5800               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5801           const auto *RHSVar =
5802               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5803           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5804                                     CritRedGen);
5805         } else {
5806           CritRedGen(CGF, nullptr, nullptr, nullptr);
5807         }
5808       }
5809       ++ILHS;
5810       ++IRHS;
5811       ++IPriv;
5812     }
5813   };
5814   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5815   if (!WithNowait) {
5816     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5817     llvm::Value *EndArgs[] = {
5818         IdentTLoc, // ident_t *<loc>
5819         ThreadId,  // i32 <gtid>
5820         Lock       // kmp_critical_name *&<lock>
5821     };
5822     CommonActionTy Action(nullptr, llvm::None,
5823                           OMPBuilder.getOrCreateRuntimeFunction(
5824                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5825                           EndArgs);
5826     AtomicRCG.setAction(Action);
5827     AtomicRCG(CGF);
5828   } else {
5829     AtomicRCG(CGF);
5830   }
5831 
5832   CGF.EmitBranch(DefaultBB);
5833   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5834 }
5835 
5836 /// Generates unique name for artificial threadprivate variables.
5837 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5838 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5839                                       const Expr *Ref) {
5840   SmallString<256> Buffer;
5841   llvm::raw_svector_ostream Out(Buffer);
5842   const clang::DeclRefExpr *DE;
5843   const VarDecl *D = ::getBaseDecl(Ref, DE);
5844   if (!D)
5845     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5846   D = D->getCanonicalDecl();
5847   std::string Name = CGM.getOpenMPRuntime().getName(
5848       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5849   Out << Prefix << Name << "_"
5850       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5851   return std::string(Out.str());
5852 }
5853 
5854 /// Emits reduction initializer function:
5855 /// \code
5856 /// void @.red_init(void* %arg, void* %orig) {
5857 /// %0 = bitcast void* %arg to <type>*
5858 /// store <type> <init>, <type>* %0
5859 /// ret void
5860 /// }
5861 /// \endcode
5862 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5863                                            SourceLocation Loc,
5864                                            ReductionCodeGen &RCG, unsigned N) {
5865   ASTContext &C = CGM.getContext();
5866   QualType VoidPtrTy = C.VoidPtrTy;
5867   VoidPtrTy.addRestrict();
5868   FunctionArgList Args;
5869   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5870                           ImplicitParamDecl::Other);
5871   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5872                               ImplicitParamDecl::Other);
5873   Args.emplace_back(&Param);
5874   Args.emplace_back(&ParamOrig);
5875   const auto &FnInfo =
5876       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5877   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5878   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5879   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5880                                     Name, &CGM.getModule());
5881   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5882   Fn->setDoesNotRecurse();
5883   CodeGenFunction CGF(CGM);
5884   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5885   Address PrivateAddr = CGF.EmitLoadOfPointer(
5886       CGF.GetAddrOfLocalVar(&Param),
5887       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5888   llvm::Value *Size = nullptr;
5889   // If the size of the reduction item is non-constant, load it from global
5890   // threadprivate variable.
5891   if (RCG.getSizes(N).second) {
5892     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5893         CGF, CGM.getContext().getSizeType(),
5894         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5895     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5896                                 CGM.getContext().getSizeType(), Loc);
5897   }
5898   RCG.emitAggregateType(CGF, N, Size);
5899   LValue OrigLVal;
5900   // If initializer uses initializer from declare reduction construct, emit a
5901   // pointer to the address of the original reduction item (reuired by reduction
5902   // initializer)
5903   if (RCG.usesReductionInitializer(N)) {
5904     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5905     SharedAddr = CGF.EmitLoadOfPointer(
5906         SharedAddr,
5907         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5908     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5909   } else {
5910     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5911         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5912         CGM.getContext().VoidPtrTy);
5913   }
5914   // Emit the initializer:
5915   // %0 = bitcast void* %arg to <type>*
5916   // store <type> <init>, <type>* %0
5917   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5918                          [](CodeGenFunction &) { return false; });
5919   CGF.FinishFunction();
5920   return Fn;
5921 }
5922 
5923 /// Emits reduction combiner function:
5924 /// \code
5925 /// void @.red_comb(void* %arg0, void* %arg1) {
5926 /// %lhs = bitcast void* %arg0 to <type>*
5927 /// %rhs = bitcast void* %arg1 to <type>*
5928 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5929 /// store <type> %2, <type>* %lhs
5930 /// ret void
5931 /// }
5932 /// \endcode
5933 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5934                                            SourceLocation Loc,
5935                                            ReductionCodeGen &RCG, unsigned N,
5936                                            const Expr *ReductionOp,
5937                                            const Expr *LHS, const Expr *RHS,
5938                                            const Expr *PrivateRef) {
5939   ASTContext &C = CGM.getContext();
5940   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5941   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5942   FunctionArgList Args;
5943   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5944                                C.VoidPtrTy, ImplicitParamDecl::Other);
5945   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5946                             ImplicitParamDecl::Other);
5947   Args.emplace_back(&ParamInOut);
5948   Args.emplace_back(&ParamIn);
5949   const auto &FnInfo =
5950       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5951   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5952   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5953   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5954                                     Name, &CGM.getModule());
5955   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5956   Fn->setDoesNotRecurse();
5957   CodeGenFunction CGF(CGM);
5958   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5959   llvm::Value *Size = nullptr;
5960   // If the size of the reduction item is non-constant, load it from global
5961   // threadprivate variable.
5962   if (RCG.getSizes(N).second) {
5963     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5964         CGF, CGM.getContext().getSizeType(),
5965         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5966     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5967                                 CGM.getContext().getSizeType(), Loc);
5968   }
5969   RCG.emitAggregateType(CGF, N, Size);
5970   // Remap lhs and rhs variables to the addresses of the function arguments.
5971   // %lhs = bitcast void* %arg0 to <type>*
5972   // %rhs = bitcast void* %arg1 to <type>*
5973   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5974   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5975     // Pull out the pointer to the variable.
5976     Address PtrAddr = CGF.EmitLoadOfPointer(
5977         CGF.GetAddrOfLocalVar(&ParamInOut),
5978         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5979     return CGF.Builder.CreateElementBitCast(
5980         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5981   });
5982   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5983     // Pull out the pointer to the variable.
5984     Address PtrAddr = CGF.EmitLoadOfPointer(
5985         CGF.GetAddrOfLocalVar(&ParamIn),
5986         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5987     return CGF.Builder.CreateElementBitCast(
5988         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5989   });
5990   PrivateScope.Privatize();
5991   // Emit the combiner body:
5992   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5993   // store <type> %2, <type>* %lhs
5994   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5995       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5996       cast<DeclRefExpr>(RHS));
5997   CGF.FinishFunction();
5998   return Fn;
5999 }
6000 
6001 /// Emits reduction finalizer function:
6002 /// \code
6003 /// void @.red_fini(void* %arg) {
6004 /// %0 = bitcast void* %arg to <type>*
6005 /// <destroy>(<type>* %0)
6006 /// ret void
6007 /// }
6008 /// \endcode
6009 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6010                                            SourceLocation Loc,
6011                                            ReductionCodeGen &RCG, unsigned N) {
6012   if (!RCG.needCleanups(N))
6013     return nullptr;
6014   ASTContext &C = CGM.getContext();
6015   FunctionArgList Args;
6016   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6017                           ImplicitParamDecl::Other);
6018   Args.emplace_back(&Param);
6019   const auto &FnInfo =
6020       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6021   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6022   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6023   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6024                                     Name, &CGM.getModule());
6025   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6026   Fn->setDoesNotRecurse();
6027   CodeGenFunction CGF(CGM);
6028   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6029   Address PrivateAddr = CGF.EmitLoadOfPointer(
6030       CGF.GetAddrOfLocalVar(&Param),
6031       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6032   llvm::Value *Size = nullptr;
6033   // If the size of the reduction item is non-constant, load it from global
6034   // threadprivate variable.
6035   if (RCG.getSizes(N).second) {
6036     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6037         CGF, CGM.getContext().getSizeType(),
6038         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6039     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6040                                 CGM.getContext().getSizeType(), Loc);
6041   }
6042   RCG.emitAggregateType(CGF, N, Size);
6043   // Emit the finalizer body:
6044   // <destroy>(<type>* %0)
6045   RCG.emitCleanups(CGF, N, PrivateAddr);
6046   CGF.FinishFunction(Loc);
6047   return Fn;
6048 }
6049 
6050 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6051     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6052     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6053   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6054     return nullptr;
6055 
6056   // Build typedef struct:
6057   // kmp_taskred_input {
6058   //   void *reduce_shar; // shared reduction item
6059   //   void *reduce_orig; // original reduction item used for initialization
6060   //   size_t reduce_size; // size of data item
6061   //   void *reduce_init; // data initialization routine
6062   //   void *reduce_fini; // data finalization routine
6063   //   void *reduce_comb; // data combiner routine
6064   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6065   // } kmp_taskred_input_t;
6066   ASTContext &C = CGM.getContext();
6067   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6068   RD->startDefinition();
6069   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6070   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6072   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6073   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6074   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6075   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6076       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6077   RD->completeDefinition();
6078   QualType RDType = C.getRecordType(RD);
6079   unsigned Size = Data.ReductionVars.size();
6080   llvm::APInt ArraySize(/*numBits=*/64, Size);
6081   QualType ArrayRDType = C.getConstantArrayType(
6082       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6083   // kmp_task_red_input_t .rd_input.[Size];
6084   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6085   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6086                        Data.ReductionCopies, Data.ReductionOps);
6087   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6088     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6089     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6090                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6091     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6092         TaskRedInput.getPointer(), Idxs,
6093         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6094         ".rd_input.gep.");
6095     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6096     // ElemLVal.reduce_shar = &Shareds[Cnt];
6097     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6098     RCG.emitSharedOrigLValue(CGF, Cnt);
6099     llvm::Value *CastedShared =
6100         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6101     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6102     // ElemLVal.reduce_orig = &Origs[Cnt];
6103     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6104     llvm::Value *CastedOrig =
6105         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6106     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6107     RCG.emitAggregateType(CGF, Cnt);
6108     llvm::Value *SizeValInChars;
6109     llvm::Value *SizeVal;
6110     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6111     // We use delayed creation/initialization for VLAs and array sections. It is
6112     // required because runtime does not provide the way to pass the sizes of
6113     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6114     // threadprivate global variables are used to store these values and use
6115     // them in the functions.
6116     bool DelayedCreation = !!SizeVal;
6117     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6118                                                /*isSigned=*/false);
6119     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6120     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6121     // ElemLVal.reduce_init = init;
6122     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6123     llvm::Value *InitAddr =
6124         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6125     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6126     // ElemLVal.reduce_fini = fini;
6127     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6128     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6129     llvm::Value *FiniAddr = Fini
6130                                 ? CGF.EmitCastToVoidPtr(Fini)
6131                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6132     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6133     // ElemLVal.reduce_comb = comb;
6134     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6135     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6136         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6137         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6138     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6139     // ElemLVal.flags = 0;
6140     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6141     if (DelayedCreation) {
6142       CGF.EmitStoreOfScalar(
6143           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6144           FlagsLVal);
6145     } else
6146       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6147                                  FlagsLVal.getType());
6148   }
6149   if (Data.IsReductionWithTaskMod) {
6150     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6151     // is_ws, int num, void *data);
6152     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6153     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6154                                                   CGM.IntTy, /*isSigned=*/true);
6155     llvm::Value *Args[] = {
6156         IdentTLoc, GTid,
6157         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6158                                /*isSigned=*/true),
6159         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6160         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6161             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6162     return CGF.EmitRuntimeCall(
6163         OMPBuilder.getOrCreateRuntimeFunction(
6164             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6165         Args);
6166   }
6167   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6168   llvm::Value *Args[] = {
6169       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6170                                 /*isSigned=*/true),
6171       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6172       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6173                                                       CGM.VoidPtrTy)};
6174   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6175                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6176                              Args);
6177 }
6178 
6179 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6180                                             SourceLocation Loc,
6181                                             bool IsWorksharingReduction) {
6182   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6183   // is_ws, int num, void *data);
6184   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6185   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6186                                                 CGM.IntTy, /*isSigned=*/true);
6187   llvm::Value *Args[] = {IdentTLoc, GTid,
6188                          llvm::ConstantInt::get(CGM.IntTy,
6189                                                 IsWorksharingReduction ? 1 : 0,
6190                                                 /*isSigned=*/true)};
6191   (void)CGF.EmitRuntimeCall(
6192       OMPBuilder.getOrCreateRuntimeFunction(
6193           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6194       Args);
6195 }
6196 
6197 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6198                                               SourceLocation Loc,
6199                                               ReductionCodeGen &RCG,
6200                                               unsigned N) {
6201   auto Sizes = RCG.getSizes(N);
6202   // Emit threadprivate global variable if the type is non-constant
6203   // (Sizes.second = nullptr).
6204   if (Sizes.second) {
6205     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6206                                                      /*isSigned=*/false);
6207     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6208         CGF, CGM.getContext().getSizeType(),
6209         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6210     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6211   }
6212 }
6213 
6214 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6215                                               SourceLocation Loc,
6216                                               llvm::Value *ReductionsPtr,
6217                                               LValue SharedLVal) {
6218   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6219   // *d);
6220   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6221                                                    CGM.IntTy,
6222                                                    /*isSigned=*/true),
6223                          ReductionsPtr,
6224                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6225                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6226   return Address(
6227       CGF.EmitRuntimeCall(
6228           OMPBuilder.getOrCreateRuntimeFunction(
6229               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6230           Args),
6231       SharedLVal.getAlignment());
6232 }
6233 
6234 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6235                                        SourceLocation Loc) {
6236   if (!CGF.HaveInsertPoint())
6237     return;
6238 
6239   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6240     OMPBuilder.createTaskwait(CGF.Builder);
6241   } else {
6242     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6243     // global_tid);
6244     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6245     // Ignore return result until untied tasks are supported.
6246     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6247                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6248                         Args);
6249   }
6250 
6251   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6252     Region->emitUntiedSwitch(CGF);
6253 }
6254 
6255 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6256                                            OpenMPDirectiveKind InnerKind,
6257                                            const RegionCodeGenTy &CodeGen,
6258                                            bool HasCancel) {
6259   if (!CGF.HaveInsertPoint())
6260     return;
6261   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6262                                  InnerKind != OMPD_critical &&
6263                                      InnerKind != OMPD_master &&
6264                                      InnerKind != OMPD_masked);
6265   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6266 }
6267 
6268 namespace {
6269 enum RTCancelKind {
6270   CancelNoreq = 0,
6271   CancelParallel = 1,
6272   CancelLoop = 2,
6273   CancelSections = 3,
6274   CancelTaskgroup = 4
6275 };
6276 } // anonymous namespace
6277 
6278 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6279   RTCancelKind CancelKind = CancelNoreq;
6280   if (CancelRegion == OMPD_parallel)
6281     CancelKind = CancelParallel;
6282   else if (CancelRegion == OMPD_for)
6283     CancelKind = CancelLoop;
6284   else if (CancelRegion == OMPD_sections)
6285     CancelKind = CancelSections;
6286   else {
6287     assert(CancelRegion == OMPD_taskgroup);
6288     CancelKind = CancelTaskgroup;
6289   }
6290   return CancelKind;
6291 }
6292 
6293 void CGOpenMPRuntime::emitCancellationPointCall(
6294     CodeGenFunction &CGF, SourceLocation Loc,
6295     OpenMPDirectiveKind CancelRegion) {
6296   if (!CGF.HaveInsertPoint())
6297     return;
6298   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6299   // global_tid, kmp_int32 cncl_kind);
6300   if (auto *OMPRegionInfo =
6301           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6302     // For 'cancellation point taskgroup', the task region info may not have a
6303     // cancel. This may instead happen in another adjacent task.
6304     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6305       llvm::Value *Args[] = {
6306           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6307           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6308       // Ignore return result until untied tasks are supported.
6309       llvm::Value *Result = CGF.EmitRuntimeCall(
6310           OMPBuilder.getOrCreateRuntimeFunction(
6311               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6312           Args);
6313       // if (__kmpc_cancellationpoint()) {
6314       //   exit from construct;
6315       // }
6316       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6317       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6318       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6319       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6320       CGF.EmitBlock(ExitBB);
6321       // exit from construct;
6322       CodeGenFunction::JumpDest CancelDest =
6323           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6324       CGF.EmitBranchThroughCleanup(CancelDest);
6325       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6326     }
6327   }
6328 }
6329 
6330 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6331                                      const Expr *IfCond,
6332                                      OpenMPDirectiveKind CancelRegion) {
6333   if (!CGF.HaveInsertPoint())
6334     return;
6335   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6336   // kmp_int32 cncl_kind);
6337   auto &M = CGM.getModule();
6338   if (auto *OMPRegionInfo =
6339           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6340     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6341                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6342       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6343       llvm::Value *Args[] = {
6344           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6345           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6346       // Ignore return result until untied tasks are supported.
6347       llvm::Value *Result = CGF.EmitRuntimeCall(
6348           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6349       // if (__kmpc_cancel()) {
6350       //   exit from construct;
6351       // }
6352       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6353       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6354       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6355       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6356       CGF.EmitBlock(ExitBB);
6357       // exit from construct;
6358       CodeGenFunction::JumpDest CancelDest =
6359           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6360       CGF.EmitBranchThroughCleanup(CancelDest);
6361       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6362     };
6363     if (IfCond) {
6364       emitIfClause(CGF, IfCond, ThenGen,
6365                    [](CodeGenFunction &, PrePostActionTy &) {});
6366     } else {
6367       RegionCodeGenTy ThenRCG(ThenGen);
6368       ThenRCG(CGF);
6369     }
6370   }
6371 }
6372 
6373 namespace {
6374 /// Cleanup action for uses_allocators support.
6375 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6376   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6377 
6378 public:
6379   OMPUsesAllocatorsActionTy(
6380       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6381       : Allocators(Allocators) {}
6382   void Enter(CodeGenFunction &CGF) override {
6383     if (!CGF.HaveInsertPoint())
6384       return;
6385     for (const auto &AllocatorData : Allocators) {
6386       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6387           CGF, AllocatorData.first, AllocatorData.second);
6388     }
6389   }
6390   void Exit(CodeGenFunction &CGF) override {
6391     if (!CGF.HaveInsertPoint())
6392       return;
6393     for (const auto &AllocatorData : Allocators) {
6394       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6395                                                         AllocatorData.first);
6396     }
6397   }
6398 };
6399 } // namespace
6400 
6401 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6402     const OMPExecutableDirective &D, StringRef ParentName,
6403     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6404     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6405   assert(!ParentName.empty() && "Invalid target region parent name!");
6406   HasEmittedTargetRegion = true;
6407   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6408   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6409     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6410       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6411       if (!D.AllocatorTraits)
6412         continue;
6413       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6414     }
6415   }
6416   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6417   CodeGen.setAction(UsesAllocatorAction);
6418   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6419                                    IsOffloadEntry, CodeGen);
6420 }
6421 
6422 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6423                                              const Expr *Allocator,
6424                                              const Expr *AllocatorTraits) {
6425   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6426   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6427   // Use default memspace handle.
6428   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6429   llvm::Value *NumTraits = llvm::ConstantInt::get(
6430       CGF.IntTy, cast<ConstantArrayType>(
6431                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6432                      ->getSize()
6433                      .getLimitedValue());
6434   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6435   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6436       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6437   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6438                                            AllocatorTraitsLVal.getBaseInfo(),
6439                                            AllocatorTraitsLVal.getTBAAInfo());
6440   llvm::Value *Traits =
6441       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6442 
6443   llvm::Value *AllocatorVal =
6444       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6445                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6446                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6447   // Store to allocator.
6448   CGF.EmitVarDecl(*cast<VarDecl>(
6449       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6450   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6451   AllocatorVal =
6452       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6453                                Allocator->getType(), Allocator->getExprLoc());
6454   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6455 }
6456 
6457 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6458                                              const Expr *Allocator) {
6459   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6460   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6461   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6462   llvm::Value *AllocatorVal =
6463       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6464   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6465                                           CGF.getContext().VoidPtrTy,
6466                                           Allocator->getExprLoc());
6467   (void)CGF.EmitRuntimeCall(
6468       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6469                                             OMPRTL___kmpc_destroy_allocator),
6470       {ThreadId, AllocatorVal});
6471 }
6472 
6473 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6474     const OMPExecutableDirective &D, StringRef ParentName,
6475     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6476     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6477   // Create a unique name for the entry function using the source location
6478   // information of the current target region. The name will be something like:
6479   //
6480   // __omp_offloading_DD_FFFF_PP_lBB
6481   //
6482   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6483   // mangled name of the function that encloses the target region and BB is the
6484   // line number of the target region.
6485 
6486   unsigned DeviceID;
6487   unsigned FileID;
6488   unsigned Line;
6489   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6490                            Line);
6491   SmallString<64> EntryFnName;
6492   {
6493     llvm::raw_svector_ostream OS(EntryFnName);
6494     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6495        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6496   }
6497 
6498   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6499 
6500   CodeGenFunction CGF(CGM, true);
6501   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6502   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6503 
6504   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6505 
6506   // If this target outline function is not an offload entry, we don't need to
6507   // register it.
6508   if (!IsOffloadEntry)
6509     return;
6510 
6511   // The target region ID is used by the runtime library to identify the current
6512   // target region, so it only has to be unique and not necessarily point to
6513   // anything. It could be the pointer to the outlined function that implements
6514   // the target region, but we aren't using that so that the compiler doesn't
6515   // need to keep that, and could therefore inline the host function if proven
6516   // worthwhile during optimization. In the other hand, if emitting code for the
6517   // device, the ID has to be the function address so that it can retrieved from
6518   // the offloading entry and launched by the runtime library. We also mark the
6519   // outlined function to have external linkage in case we are emitting code for
6520   // the device, because these functions will be entry points to the device.
6521 
6522   if (CGM.getLangOpts().OpenMPIsDevice) {
6523     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6524     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6525     OutlinedFn->setDSOLocal(false);
6526     if (CGM.getTriple().isAMDGCN())
6527       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6528   } else {
6529     std::string Name = getName({EntryFnName, "region_id"});
6530     OutlinedFnID = new llvm::GlobalVariable(
6531         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6532         llvm::GlobalValue::WeakAnyLinkage,
6533         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6534   }
6535 
6536   // Register the information for the entry associated with this target region.
6537   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6538       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6539       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6540 }
6541 
6542 /// Checks if the expression is constant or does not have non-trivial function
6543 /// calls.
6544 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6545   // We can skip constant expressions.
6546   // We can skip expressions with trivial calls or simple expressions.
6547   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6548           !E->hasNonTrivialCall(Ctx)) &&
6549          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6550 }
6551 
6552 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6553                                                     const Stmt *Body) {
6554   const Stmt *Child = Body->IgnoreContainers();
6555   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6556     Child = nullptr;
6557     for (const Stmt *S : C->body()) {
6558       if (const auto *E = dyn_cast<Expr>(S)) {
6559         if (isTrivial(Ctx, E))
6560           continue;
6561       }
6562       // Some of the statements can be ignored.
6563       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6564           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6565         continue;
6566       // Analyze declarations.
6567       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6568         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6569               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6570                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6571                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6572                   isa<UsingDirectiveDecl>(D) ||
6573                   isa<OMPDeclareReductionDecl>(D) ||
6574                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6575                 return true;
6576               const auto *VD = dyn_cast<VarDecl>(D);
6577               if (!VD)
6578                 return false;
6579               return VD->isConstexpr() ||
6580                      ((VD->getType().isTrivialType(Ctx) ||
6581                        VD->getType()->isReferenceType()) &&
6582                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6583             }))
6584           continue;
6585       }
6586       // Found multiple children - cannot get the one child only.
6587       if (Child)
6588         return nullptr;
6589       Child = S;
6590     }
6591     if (Child)
6592       Child = Child->IgnoreContainers();
6593   }
6594   return Child;
6595 }
6596 
6597 /// Emit the number of teams for a target directive.  Inspect the num_teams
6598 /// clause associated with a teams construct combined or closely nested
6599 /// with the target directive.
6600 ///
6601 /// Emit a team of size one for directives such as 'target parallel' that
6602 /// have no associated teams construct.
6603 ///
6604 /// Otherwise, return nullptr.
6605 static llvm::Value *
6606 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6607                                const OMPExecutableDirective &D) {
6608   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6609          "Clauses associated with the teams directive expected to be emitted "
6610          "only for the host!");
6611   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6612   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6613          "Expected target-based executable directive.");
6614   CGBuilderTy &Bld = CGF.Builder;
6615   switch (DirectiveKind) {
6616   case OMPD_target: {
6617     const auto *CS = D.getInnermostCapturedStmt();
6618     const auto *Body =
6619         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6620     const Stmt *ChildStmt =
6621         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6622     if (const auto *NestedDir =
6623             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6624       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6625         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6626           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6627           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6628           const Expr *NumTeams =
6629               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6630           llvm::Value *NumTeamsVal =
6631               CGF.EmitScalarExpr(NumTeams,
6632                                  /*IgnoreResultAssign*/ true);
6633           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6634                                    /*isSigned=*/true);
6635         }
6636         return Bld.getInt32(0);
6637       }
6638       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6639           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6640         return Bld.getInt32(1);
6641       return Bld.getInt32(0);
6642     }
6643     return nullptr;
6644   }
6645   case OMPD_target_teams:
6646   case OMPD_target_teams_distribute:
6647   case OMPD_target_teams_distribute_simd:
6648   case OMPD_target_teams_distribute_parallel_for:
6649   case OMPD_target_teams_distribute_parallel_for_simd: {
6650     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6651       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6652       const Expr *NumTeams =
6653           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6654       llvm::Value *NumTeamsVal =
6655           CGF.EmitScalarExpr(NumTeams,
6656                              /*IgnoreResultAssign*/ true);
6657       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6658                                /*isSigned=*/true);
6659     }
6660     return Bld.getInt32(0);
6661   }
6662   case OMPD_target_parallel:
6663   case OMPD_target_parallel_for:
6664   case OMPD_target_parallel_for_simd:
6665   case OMPD_target_simd:
6666     return Bld.getInt32(1);
6667   case OMPD_parallel:
6668   case OMPD_for:
6669   case OMPD_parallel_for:
6670   case OMPD_parallel_master:
6671   case OMPD_parallel_sections:
6672   case OMPD_for_simd:
6673   case OMPD_parallel_for_simd:
6674   case OMPD_cancel:
6675   case OMPD_cancellation_point:
6676   case OMPD_ordered:
6677   case OMPD_threadprivate:
6678   case OMPD_allocate:
6679   case OMPD_task:
6680   case OMPD_simd:
6681   case OMPD_tile:
6682   case OMPD_sections:
6683   case OMPD_section:
6684   case OMPD_single:
6685   case OMPD_master:
6686   case OMPD_critical:
6687   case OMPD_taskyield:
6688   case OMPD_barrier:
6689   case OMPD_taskwait:
6690   case OMPD_taskgroup:
6691   case OMPD_atomic:
6692   case OMPD_flush:
6693   case OMPD_depobj:
6694   case OMPD_scan:
6695   case OMPD_teams:
6696   case OMPD_target_data:
6697   case OMPD_target_exit_data:
6698   case OMPD_target_enter_data:
6699   case OMPD_distribute:
6700   case OMPD_distribute_simd:
6701   case OMPD_distribute_parallel_for:
6702   case OMPD_distribute_parallel_for_simd:
6703   case OMPD_teams_distribute:
6704   case OMPD_teams_distribute_simd:
6705   case OMPD_teams_distribute_parallel_for:
6706   case OMPD_teams_distribute_parallel_for_simd:
6707   case OMPD_target_update:
6708   case OMPD_declare_simd:
6709   case OMPD_declare_variant:
6710   case OMPD_begin_declare_variant:
6711   case OMPD_end_declare_variant:
6712   case OMPD_declare_target:
6713   case OMPD_end_declare_target:
6714   case OMPD_declare_reduction:
6715   case OMPD_declare_mapper:
6716   case OMPD_taskloop:
6717   case OMPD_taskloop_simd:
6718   case OMPD_master_taskloop:
6719   case OMPD_master_taskloop_simd:
6720   case OMPD_parallel_master_taskloop:
6721   case OMPD_parallel_master_taskloop_simd:
6722   case OMPD_requires:
6723   case OMPD_unknown:
6724     break;
6725   default:
6726     break;
6727   }
6728   llvm_unreachable("Unexpected directive kind.");
6729 }
6730 
6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6732                                   llvm::Value *DefaultThreadLimitVal) {
6733   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6734       CGF.getContext(), CS->getCapturedStmt());
6735   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6736     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6737       llvm::Value *NumThreads = nullptr;
6738       llvm::Value *CondVal = nullptr;
6739       // Handle if clause. If if clause present, the number of threads is
6740       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6742         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6743         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6744         const OMPIfClause *IfClause = nullptr;
6745         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6746           if (C->getNameModifier() == OMPD_unknown ||
6747               C->getNameModifier() == OMPD_parallel) {
6748             IfClause = C;
6749             break;
6750           }
6751         }
6752         if (IfClause) {
6753           const Expr *Cond = IfClause->getCondition();
6754           bool Result;
6755           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6756             if (!Result)
6757               return CGF.Builder.getInt32(1);
6758           } else {
6759             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6760             if (const auto *PreInit =
6761                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6762               for (const auto *I : PreInit->decls()) {
6763                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6764                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6765                 } else {
6766                   CodeGenFunction::AutoVarEmission Emission =
6767                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6768                   CGF.EmitAutoVarCleanups(Emission);
6769                 }
6770               }
6771             }
6772             CondVal = CGF.EvaluateExprAsBool(Cond);
6773           }
6774         }
6775       }
6776       // Check the value of num_threads clause iff if clause was not specified
6777       // or is not evaluated to false.
6778       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6779         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6780         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6781         const auto *NumThreadsClause =
6782             Dir->getSingleClause<OMPNumThreadsClause>();
6783         CodeGenFunction::LexicalScope Scope(
6784             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6785         if (const auto *PreInit =
6786                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6787           for (const auto *I : PreInit->decls()) {
6788             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6789               CGF.EmitVarDecl(cast<VarDecl>(*I));
6790             } else {
6791               CodeGenFunction::AutoVarEmission Emission =
6792                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6793               CGF.EmitAutoVarCleanups(Emission);
6794             }
6795           }
6796         }
6797         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6798         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6799                                                /*isSigned=*/false);
6800         if (DefaultThreadLimitVal)
6801           NumThreads = CGF.Builder.CreateSelect(
6802               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6803               DefaultThreadLimitVal, NumThreads);
6804       } else {
6805         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6806                                            : CGF.Builder.getInt32(0);
6807       }
6808       // Process condition of the if clause.
6809       if (CondVal) {
6810         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6811                                               CGF.Builder.getInt32(1));
6812       }
6813       return NumThreads;
6814     }
6815     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6816       return CGF.Builder.getInt32(1);
6817     return DefaultThreadLimitVal;
6818   }
6819   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6820                                : CGF.Builder.getInt32(0);
6821 }
6822 
6823 /// Emit the number of threads for a target directive.  Inspect the
6824 /// thread_limit clause associated with a teams construct combined or closely
6825 /// nested with the target directive.
6826 ///
6827 /// Emit the num_threads clause for directives such as 'target parallel' that
6828 /// have no associated teams construct.
6829 ///
6830 /// Otherwise, return nullptr.
6831 static llvm::Value *
6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6833                                  const OMPExecutableDirective &D) {
6834   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6835          "Clauses associated with the teams directive expected to be emitted "
6836          "only for the host!");
6837   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6838   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6839          "Expected target-based executable directive.");
6840   CGBuilderTy &Bld = CGF.Builder;
6841   llvm::Value *ThreadLimitVal = nullptr;
6842   llvm::Value *NumThreadsVal = nullptr;
6843   switch (DirectiveKind) {
6844   case OMPD_target: {
6845     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6846     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6847       return NumThreads;
6848     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6849         CGF.getContext(), CS->getCapturedStmt());
6850     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6851       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6852         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6853         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6854         const auto *ThreadLimitClause =
6855             Dir->getSingleClause<OMPThreadLimitClause>();
6856         CodeGenFunction::LexicalScope Scope(
6857             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6858         if (const auto *PreInit =
6859                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6860           for (const auto *I : PreInit->decls()) {
6861             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6862               CGF.EmitVarDecl(cast<VarDecl>(*I));
6863             } else {
6864               CodeGenFunction::AutoVarEmission Emission =
6865                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6866               CGF.EmitAutoVarCleanups(Emission);
6867             }
6868           }
6869         }
6870         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872         ThreadLimitVal =
6873             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874       }
6875       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6876           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6877         CS = Dir->getInnermostCapturedStmt();
6878         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879             CGF.getContext(), CS->getCapturedStmt());
6880         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6881       }
6882       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6883           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6884         CS = Dir->getInnermostCapturedStmt();
6885         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6886           return NumThreads;
6887       }
6888       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6889         return Bld.getInt32(1);
6890     }
6891     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6892   }
6893   case OMPD_target_teams: {
6894     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6895       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6896       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6897       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6898           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6899       ThreadLimitVal =
6900           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6901     }
6902     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6903     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6904       return NumThreads;
6905     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6906         CGF.getContext(), CS->getCapturedStmt());
6907     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6908       if (Dir->getDirectiveKind() == OMPD_distribute) {
6909         CS = Dir->getInnermostCapturedStmt();
6910         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6911           return NumThreads;
6912       }
6913     }
6914     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6915   }
6916   case OMPD_target_teams_distribute:
6917     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6918       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6919       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6921           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6922       ThreadLimitVal =
6923           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6924     }
6925     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6926   case OMPD_target_parallel:
6927   case OMPD_target_parallel_for:
6928   case OMPD_target_parallel_for_simd:
6929   case OMPD_target_teams_distribute_parallel_for:
6930   case OMPD_target_teams_distribute_parallel_for_simd: {
6931     llvm::Value *CondVal = nullptr;
6932     // Handle if clause. If if clause present, the number of threads is
6933     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6934     if (D.hasClausesOfKind<OMPIfClause>()) {
6935       const OMPIfClause *IfClause = nullptr;
6936       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6937         if (C->getNameModifier() == OMPD_unknown ||
6938             C->getNameModifier() == OMPD_parallel) {
6939           IfClause = C;
6940           break;
6941         }
6942       }
6943       if (IfClause) {
6944         const Expr *Cond = IfClause->getCondition();
6945         bool Result;
6946         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6947           if (!Result)
6948             return Bld.getInt32(1);
6949         } else {
6950           CodeGenFunction::RunCleanupsScope Scope(CGF);
6951           CondVal = CGF.EvaluateExprAsBool(Cond);
6952         }
6953       }
6954     }
6955     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6956       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6957       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6958       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6959           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6960       ThreadLimitVal =
6961           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6962     }
6963     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6964       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6965       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6966       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6967           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6968       NumThreadsVal =
6969           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6970       ThreadLimitVal = ThreadLimitVal
6971                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6972                                                                 ThreadLimitVal),
6973                                               NumThreadsVal, ThreadLimitVal)
6974                            : NumThreadsVal;
6975     }
6976     if (!ThreadLimitVal)
6977       ThreadLimitVal = Bld.getInt32(0);
6978     if (CondVal)
6979       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6980     return ThreadLimitVal;
6981   }
6982   case OMPD_target_teams_distribute_simd:
6983   case OMPD_target_simd:
6984     return Bld.getInt32(1);
6985   case OMPD_parallel:
6986   case OMPD_for:
6987   case OMPD_parallel_for:
6988   case OMPD_parallel_master:
6989   case OMPD_parallel_sections:
6990   case OMPD_for_simd:
6991   case OMPD_parallel_for_simd:
6992   case OMPD_cancel:
6993   case OMPD_cancellation_point:
6994   case OMPD_ordered:
6995   case OMPD_threadprivate:
6996   case OMPD_allocate:
6997   case OMPD_task:
6998   case OMPD_simd:
6999   case OMPD_tile:
7000   case OMPD_sections:
7001   case OMPD_section:
7002   case OMPD_single:
7003   case OMPD_master:
7004   case OMPD_critical:
7005   case OMPD_taskyield:
7006   case OMPD_barrier:
7007   case OMPD_taskwait:
7008   case OMPD_taskgroup:
7009   case OMPD_atomic:
7010   case OMPD_flush:
7011   case OMPD_depobj:
7012   case OMPD_scan:
7013   case OMPD_teams:
7014   case OMPD_target_data:
7015   case OMPD_target_exit_data:
7016   case OMPD_target_enter_data:
7017   case OMPD_distribute:
7018   case OMPD_distribute_simd:
7019   case OMPD_distribute_parallel_for:
7020   case OMPD_distribute_parallel_for_simd:
7021   case OMPD_teams_distribute:
7022   case OMPD_teams_distribute_simd:
7023   case OMPD_teams_distribute_parallel_for:
7024   case OMPD_teams_distribute_parallel_for_simd:
7025   case OMPD_target_update:
7026   case OMPD_declare_simd:
7027   case OMPD_declare_variant:
7028   case OMPD_begin_declare_variant:
7029   case OMPD_end_declare_variant:
7030   case OMPD_declare_target:
7031   case OMPD_end_declare_target:
7032   case OMPD_declare_reduction:
7033   case OMPD_declare_mapper:
7034   case OMPD_taskloop:
7035   case OMPD_taskloop_simd:
7036   case OMPD_master_taskloop:
7037   case OMPD_master_taskloop_simd:
7038   case OMPD_parallel_master_taskloop:
7039   case OMPD_parallel_master_taskloop_simd:
7040   case OMPD_requires:
7041   case OMPD_unknown:
7042     break;
7043   default:
7044     break;
7045   }
7046   llvm_unreachable("Unsupported directive kind.");
7047 }
7048 
7049 namespace {
7050 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7051 
7052 // Utility to handle information from clauses associated with a given
7053 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7054 // It provides a convenient interface to obtain the information and generate
7055 // code for that information.
7056 class MappableExprsHandler {
7057 public:
7058   /// Values for bit flags used to specify the mapping type for
7059   /// offloading.
7060   enum OpenMPOffloadMappingFlags : uint64_t {
7061     /// No flags
7062     OMP_MAP_NONE = 0x0,
7063     /// Allocate memory on the device and move data from host to device.
7064     OMP_MAP_TO = 0x01,
7065     /// Allocate memory on the device and move data from device to host.
7066     OMP_MAP_FROM = 0x02,
7067     /// Always perform the requested mapping action on the element, even
7068     /// if it was already mapped before.
7069     OMP_MAP_ALWAYS = 0x04,
7070     /// Delete the element from the device environment, ignoring the
7071     /// current reference count associated with the element.
7072     OMP_MAP_DELETE = 0x08,
7073     /// The element being mapped is a pointer-pointee pair; both the
7074     /// pointer and the pointee should be mapped.
7075     OMP_MAP_PTR_AND_OBJ = 0x10,
7076     /// This flags signals that the base address of an entry should be
7077     /// passed to the target kernel as an argument.
7078     OMP_MAP_TARGET_PARAM = 0x20,
7079     /// Signal that the runtime library has to return the device pointer
7080     /// in the current position for the data being mapped. Used when we have the
7081     /// use_device_ptr or use_device_addr clause.
7082     OMP_MAP_RETURN_PARAM = 0x40,
7083     /// This flag signals that the reference being passed is a pointer to
7084     /// private data.
7085     OMP_MAP_PRIVATE = 0x80,
7086     /// Pass the element to the device by value.
7087     OMP_MAP_LITERAL = 0x100,
7088     /// Implicit map
7089     OMP_MAP_IMPLICIT = 0x200,
7090     /// Close is a hint to the runtime to allocate memory close to
7091     /// the target device.
7092     OMP_MAP_CLOSE = 0x400,
7093     /// 0x800 is reserved for compatibility with XLC.
7094     /// Produce a runtime error if the data is not already allocated.
7095     OMP_MAP_PRESENT = 0x1000,
7096     /// Signal that the runtime library should use args as an array of
7097     /// descriptor_dim pointers and use args_size as dims. Used when we have
7098     /// non-contiguous list items in target update directive
7099     OMP_MAP_NON_CONTIG = 0x100000000000,
7100     /// The 16 MSBs of the flags indicate whether the entry is member of some
7101     /// struct/class.
7102     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7103     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7104   };
7105 
7106   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7107   static unsigned getFlagMemberOffset() {
7108     unsigned Offset = 0;
7109     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7110          Remain = Remain >> 1)
7111       Offset++;
7112     return Offset;
7113   }
7114 
7115   /// Class that holds debugging information for a data mapping to be passed to
7116   /// the runtime library.
7117   class MappingExprInfo {
7118     /// The variable declaration used for the data mapping.
7119     const ValueDecl *MapDecl = nullptr;
7120     /// The original expression used in the map clause, or null if there is
7121     /// none.
7122     const Expr *MapExpr = nullptr;
7123 
7124   public:
7125     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7126         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7127 
7128     const ValueDecl *getMapDecl() const { return MapDecl; }
7129     const Expr *getMapExpr() const { return MapExpr; }
7130   };
7131 
7132   /// Class that associates information with a base pointer to be passed to the
7133   /// runtime library.
7134   class BasePointerInfo {
7135     /// The base pointer.
7136     llvm::Value *Ptr = nullptr;
7137     /// The base declaration that refers to this device pointer, or null if
7138     /// there is none.
7139     const ValueDecl *DevPtrDecl = nullptr;
7140 
7141   public:
7142     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7143         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7144     llvm::Value *operator*() const { return Ptr; }
7145     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7146     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7147   };
7148 
7149   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7150   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7151   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7152   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7153   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7154   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7155   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7156 
7157   /// This structure contains combined information generated for mappable
7158   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7159   /// mappers, and non-contiguous information.
7160   struct MapCombinedInfoTy {
7161     struct StructNonContiguousInfo {
7162       bool IsNonContiguous = false;
7163       MapDimArrayTy Dims;
7164       MapNonContiguousArrayTy Offsets;
7165       MapNonContiguousArrayTy Counts;
7166       MapNonContiguousArrayTy Strides;
7167     };
7168     MapExprsArrayTy Exprs;
7169     MapBaseValuesArrayTy BasePointers;
7170     MapValuesArrayTy Pointers;
7171     MapValuesArrayTy Sizes;
7172     MapFlagsArrayTy Types;
7173     MapMappersArrayTy Mappers;
7174     StructNonContiguousInfo NonContigInfo;
7175 
7176     /// Append arrays in \a CurInfo.
7177     void append(MapCombinedInfoTy &CurInfo) {
7178       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7179       BasePointers.append(CurInfo.BasePointers.begin(),
7180                           CurInfo.BasePointers.end());
7181       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7182       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7183       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7184       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7185       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7186                                  CurInfo.NonContigInfo.Dims.end());
7187       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7188                                     CurInfo.NonContigInfo.Offsets.end());
7189       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7190                                    CurInfo.NonContigInfo.Counts.end());
7191       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7192                                     CurInfo.NonContigInfo.Strides.end());
7193     }
7194   };
7195 
7196   /// Map between a struct and the its lowest & highest elements which have been
7197   /// mapped.
7198   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7199   ///                    HE(FieldIndex, Pointer)}
7200   struct StructRangeInfoTy {
7201     MapCombinedInfoTy PreliminaryMapData;
7202     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7203         0, Address::invalid()};
7204     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7205         0, Address::invalid()};
7206     Address Base = Address::invalid();
7207     Address LB = Address::invalid();
7208     bool IsArraySection = false;
7209     bool HasCompleteRecord = false;
7210   };
7211 
7212 private:
7213   /// Kind that defines how a device pointer has to be returned.
7214   struct MapInfo {
7215     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7216     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7217     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7218     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7219     bool ReturnDevicePointer = false;
7220     bool IsImplicit = false;
7221     const ValueDecl *Mapper = nullptr;
7222     const Expr *VarRef = nullptr;
7223     bool ForDeviceAddr = false;
7224 
7225     MapInfo() = default;
7226     MapInfo(
7227         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7228         OpenMPMapClauseKind MapType,
7229         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7230         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7231         bool ReturnDevicePointer, bool IsImplicit,
7232         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7233         bool ForDeviceAddr = false)
7234         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7235           MotionModifiers(MotionModifiers),
7236           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7237           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7238   };
7239 
7240   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7241   /// member and there is no map information about it, then emission of that
7242   /// entry is deferred until the whole struct has been processed.
7243   struct DeferredDevicePtrEntryTy {
7244     const Expr *IE = nullptr;
7245     const ValueDecl *VD = nullptr;
7246     bool ForDeviceAddr = false;
7247 
7248     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7249                              bool ForDeviceAddr)
7250         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7251   };
7252 
7253   /// The target directive from where the mappable clauses were extracted. It
7254   /// is either a executable directive or a user-defined mapper directive.
7255   llvm::PointerUnion<const OMPExecutableDirective *,
7256                      const OMPDeclareMapperDecl *>
7257       CurDir;
7258 
7259   /// Function the directive is being generated for.
7260   CodeGenFunction &CGF;
7261 
7262   /// Set of all first private variables in the current directive.
7263   /// bool data is set to true if the variable is implicitly marked as
7264   /// firstprivate, false otherwise.
7265   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7266 
7267   /// Map between device pointer declarations and their expression components.
7268   /// The key value for declarations in 'this' is null.
7269   llvm::DenseMap<
7270       const ValueDecl *,
7271       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7272       DevPointersMap;
7273 
7274   llvm::Value *getExprTypeSize(const Expr *E) const {
7275     QualType ExprTy = E->getType().getCanonicalType();
7276 
7277     // Calculate the size for array shaping expression.
7278     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7279       llvm::Value *Size =
7280           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7281       for (const Expr *SE : OAE->getDimensions()) {
7282         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7283         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7284                                       CGF.getContext().getSizeType(),
7285                                       SE->getExprLoc());
7286         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7287       }
7288       return Size;
7289     }
7290 
7291     // Reference types are ignored for mapping purposes.
7292     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7293       ExprTy = RefTy->getPointeeType().getCanonicalType();
7294 
7295     // Given that an array section is considered a built-in type, we need to
7296     // do the calculation based on the length of the section instead of relying
7297     // on CGF.getTypeSize(E->getType()).
7298     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7299       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7300                             OAE->getBase()->IgnoreParenImpCasts())
7301                             .getCanonicalType();
7302 
7303       // If there is no length associated with the expression and lower bound is
7304       // not specified too, that means we are using the whole length of the
7305       // base.
7306       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7307           !OAE->getLowerBound())
7308         return CGF.getTypeSize(BaseTy);
7309 
7310       llvm::Value *ElemSize;
7311       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7312         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7313       } else {
7314         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7315         assert(ATy && "Expecting array type if not a pointer type.");
7316         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7317       }
7318 
7319       // If we don't have a length at this point, that is because we have an
7320       // array section with a single element.
7321       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7322         return ElemSize;
7323 
7324       if (const Expr *LenExpr = OAE->getLength()) {
7325         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7326         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7327                                              CGF.getContext().getSizeType(),
7328                                              LenExpr->getExprLoc());
7329         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7330       }
7331       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7332              OAE->getLowerBound() && "expected array_section[lb:].");
7333       // Size = sizetype - lb * elemtype;
7334       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7335       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7336       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7337                                        CGF.getContext().getSizeType(),
7338                                        OAE->getLowerBound()->getExprLoc());
7339       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7340       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7341       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7342       LengthVal = CGF.Builder.CreateSelect(
7343           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7344       return LengthVal;
7345     }
7346     return CGF.getTypeSize(ExprTy);
7347   }
7348 
7349   /// Return the corresponding bits for a given map clause modifier. Add
7350   /// a flag marking the map as a pointer if requested. Add a flag marking the
7351   /// map as the first one of a series of maps that relate to the same map
7352   /// expression.
7353   OpenMPOffloadMappingFlags getMapTypeBits(
7354       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7355       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7356       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7357     OpenMPOffloadMappingFlags Bits =
7358         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7359     switch (MapType) {
7360     case OMPC_MAP_alloc:
7361     case OMPC_MAP_release:
7362       // alloc and release is the default behavior in the runtime library,  i.e.
7363       // if we don't pass any bits alloc/release that is what the runtime is
7364       // going to do. Therefore, we don't need to signal anything for these two
7365       // type modifiers.
7366       break;
7367     case OMPC_MAP_to:
7368       Bits |= OMP_MAP_TO;
7369       break;
7370     case OMPC_MAP_from:
7371       Bits |= OMP_MAP_FROM;
7372       break;
7373     case OMPC_MAP_tofrom:
7374       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7375       break;
7376     case OMPC_MAP_delete:
7377       Bits |= OMP_MAP_DELETE;
7378       break;
7379     case OMPC_MAP_unknown:
7380       llvm_unreachable("Unexpected map type!");
7381     }
7382     if (AddPtrFlag)
7383       Bits |= OMP_MAP_PTR_AND_OBJ;
7384     if (AddIsTargetParamFlag)
7385       Bits |= OMP_MAP_TARGET_PARAM;
7386     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7387         != MapModifiers.end())
7388       Bits |= OMP_MAP_ALWAYS;
7389     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7390         != MapModifiers.end())
7391       Bits |= OMP_MAP_CLOSE;
7392     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7393             MapModifiers.end() ||
7394         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7395             MotionModifiers.end())
7396       Bits |= OMP_MAP_PRESENT;
7397     if (IsNonContiguous)
7398       Bits |= OMP_MAP_NON_CONTIG;
7399     return Bits;
7400   }
7401 
7402   /// Return true if the provided expression is a final array section. A
7403   /// final array section, is one whose length can't be proved to be one.
7404   bool isFinalArraySectionExpression(const Expr *E) const {
7405     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7406 
7407     // It is not an array section and therefore not a unity-size one.
7408     if (!OASE)
7409       return false;
7410 
7411     // An array section with no colon always refer to a single element.
7412     if (OASE->getColonLocFirst().isInvalid())
7413       return false;
7414 
7415     const Expr *Length = OASE->getLength();
7416 
7417     // If we don't have a length we have to check if the array has size 1
7418     // for this dimension. Also, we should always expect a length if the
7419     // base type is pointer.
7420     if (!Length) {
7421       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7422                              OASE->getBase()->IgnoreParenImpCasts())
7423                              .getCanonicalType();
7424       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7425         return ATy->getSize().getSExtValue() != 1;
7426       // If we don't have a constant dimension length, we have to consider
7427       // the current section as having any size, so it is not necessarily
7428       // unitary. If it happen to be unity size, that's user fault.
7429       return true;
7430     }
7431 
7432     // Check if the length evaluates to 1.
7433     Expr::EvalResult Result;
7434     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7435       return true; // Can have more that size 1.
7436 
7437     llvm::APSInt ConstLength = Result.Val.getInt();
7438     return ConstLength.getSExtValue() != 1;
7439   }
7440 
7441   /// Generate the base pointers, section pointers, sizes, map type bits, and
7442   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7443   /// map type, map or motion modifiers, and expression components.
7444   /// \a IsFirstComponent should be set to true if the provided set of
7445   /// components is the first associated with a capture.
7446   void generateInfoForComponentList(
7447       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7448       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7449       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7450       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7451       bool IsFirstComponentList, bool IsImplicit,
7452       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7453       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7454       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7455           OverlappedElements = llvm::None) const {
7456     // The following summarizes what has to be generated for each map and the
7457     // types below. The generated information is expressed in this order:
7458     // base pointer, section pointer, size, flags
7459     // (to add to the ones that come from the map type and modifier).
7460     //
7461     // double d;
7462     // int i[100];
7463     // float *p;
7464     //
7465     // struct S1 {
7466     //   int i;
7467     //   float f[50];
7468     // }
7469     // struct S2 {
7470     //   int i;
7471     //   float f[50];
7472     //   S1 s;
7473     //   double *p;
7474     //   struct S2 *ps;
7475     //   int &ref;
7476     // }
7477     // S2 s;
7478     // S2 *ps;
7479     //
7480     // map(d)
7481     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7482     //
7483     // map(i)
7484     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7485     //
7486     // map(i[1:23])
7487     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7488     //
7489     // map(p)
7490     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7491     //
7492     // map(p[1:24])
7493     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7494     // in unified shared memory mode or for local pointers
7495     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7496     //
7497     // map(s)
7498     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7499     //
7500     // map(s.i)
7501     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7502     //
7503     // map(s.s.f)
7504     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7505     //
7506     // map(s.p)
7507     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7508     //
7509     // map(to: s.p[:22])
7510     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7511     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7512     // &(s.p), &(s.p[0]), 22*sizeof(double),
7513     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7514     // (*) alloc space for struct members, only this is a target parameter
7515     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7516     //      optimizes this entry out, same in the examples below)
7517     // (***) map the pointee (map: to)
7518     //
7519     // map(to: s.ref)
7520     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7521     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7522     // (*) alloc space for struct members, only this is a target parameter
7523     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7524     //      optimizes this entry out, same in the examples below)
7525     // (***) map the pointee (map: to)
7526     //
7527     // map(s.ps)
7528     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7529     //
7530     // map(from: s.ps->s.i)
7531     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7532     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7533     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7534     //
7535     // map(to: s.ps->ps)
7536     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7537     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7538     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7539     //
7540     // map(s.ps->ps->ps)
7541     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7542     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7543     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7544     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7545     //
7546     // map(to: s.ps->ps->s.f[:22])
7547     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7548     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7549     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7550     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7551     //
7552     // map(ps)
7553     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7554     //
7555     // map(ps->i)
7556     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7557     //
7558     // map(ps->s.f)
7559     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7560     //
7561     // map(from: ps->p)
7562     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7563     //
7564     // map(to: ps->p[:22])
7565     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7566     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7567     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7568     //
7569     // map(ps->ps)
7570     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7571     //
7572     // map(from: ps->ps->s.i)
7573     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7574     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7575     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7576     //
7577     // map(from: ps->ps->ps)
7578     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7579     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7580     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7581     //
7582     // map(ps->ps->ps->ps)
7583     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7584     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7585     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7586     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7587     //
7588     // map(to: ps->ps->ps->s.f[:22])
7589     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7590     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7591     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7592     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7593     //
7594     // map(to: s.f[:22]) map(from: s.p[:33])
7595     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7596     //     sizeof(double*) (**), TARGET_PARAM
7597     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7598     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7599     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7600     // (*) allocate contiguous space needed to fit all mapped members even if
7601     //     we allocate space for members not mapped (in this example,
7602     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7603     //     them as well because they fall between &s.f[0] and &s.p)
7604     //
7605     // map(from: s.f[:22]) map(to: ps->p[:33])
7606     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7607     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7608     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7609     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7610     // (*) the struct this entry pertains to is the 2nd element in the list of
7611     //     arguments, hence MEMBER_OF(2)
7612     //
7613     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7614     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7615     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7616     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7617     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7618     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7619     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7620     // (*) the struct this entry pertains to is the 4th element in the list
7621     //     of arguments, hence MEMBER_OF(4)
7622 
7623     // Track if the map information being generated is the first for a capture.
7624     bool IsCaptureFirstInfo = IsFirstComponentList;
7625     // When the variable is on a declare target link or in a to clause with
7626     // unified memory, a reference is needed to hold the host/device address
7627     // of the variable.
7628     bool RequiresReference = false;
7629 
7630     // Scan the components from the base to the complete expression.
7631     auto CI = Components.rbegin();
7632     auto CE = Components.rend();
7633     auto I = CI;
7634 
7635     // Track if the map information being generated is the first for a list of
7636     // components.
7637     bool IsExpressionFirstInfo = true;
7638     bool FirstPointerInComplexData = false;
7639     Address BP = Address::invalid();
7640     const Expr *AssocExpr = I->getAssociatedExpression();
7641     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7642     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7643     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7644 
7645     if (isa<MemberExpr>(AssocExpr)) {
7646       // The base is the 'this' pointer. The content of the pointer is going
7647       // to be the base of the field being mapped.
7648       BP = CGF.LoadCXXThisAddress();
7649     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7650                (OASE &&
7651                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7652       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7653     } else if (OAShE &&
7654                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7655       BP = Address(
7656           CGF.EmitScalarExpr(OAShE->getBase()),
7657           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7658     } else {
7659       // The base is the reference to the variable.
7660       // BP = &Var.
7661       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7662       if (const auto *VD =
7663               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7664         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7665                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7666           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7667               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7668                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7669             RequiresReference = true;
7670             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7671           }
7672         }
7673       }
7674 
7675       // If the variable is a pointer and is being dereferenced (i.e. is not
7676       // the last component), the base has to be the pointer itself, not its
7677       // reference. References are ignored for mapping purposes.
7678       QualType Ty =
7679           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7680       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7681         // No need to generate individual map information for the pointer, it
7682         // can be associated with the combined storage if shared memory mode is
7683         // active or the base declaration is not global variable.
7684         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7685         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7686             !VD || VD->hasLocalStorage())
7687           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7688         else
7689           FirstPointerInComplexData = true;
7690         ++I;
7691       }
7692     }
7693 
7694     // Track whether a component of the list should be marked as MEMBER_OF some
7695     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7696     // in a component list should be marked as MEMBER_OF, all subsequent entries
7697     // do not belong to the base struct. E.g.
7698     // struct S2 s;
7699     // s.ps->ps->ps->f[:]
7700     //   (1) (2) (3) (4)
7701     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7702     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7703     // is the pointee of ps(2) which is not member of struct s, so it should not
7704     // be marked as such (it is still PTR_AND_OBJ).
7705     // The variable is initialized to false so that PTR_AND_OBJ entries which
7706     // are not struct members are not considered (e.g. array of pointers to
7707     // data).
7708     bool ShouldBeMemberOf = false;
7709 
7710     // Variable keeping track of whether or not we have encountered a component
7711     // in the component list which is a member expression. Useful when we have a
7712     // pointer or a final array section, in which case it is the previous
7713     // component in the list which tells us whether we have a member expression.
7714     // E.g. X.f[:]
7715     // While processing the final array section "[:]" it is "f" which tells us
7716     // whether we are dealing with a member of a declared struct.
7717     const MemberExpr *EncounteredME = nullptr;
7718 
7719     // Track for the total number of dimension. Start from one for the dummy
7720     // dimension.
7721     uint64_t DimSize = 1;
7722 
7723     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7724     bool IsPrevMemberReference = false;
7725 
7726     for (; I != CE; ++I) {
7727       // If the current component is member of a struct (parent struct) mark it.
7728       if (!EncounteredME) {
7729         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7730         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7731         // as MEMBER_OF the parent struct.
7732         if (EncounteredME) {
7733           ShouldBeMemberOf = true;
7734           // Do not emit as complex pointer if this is actually not array-like
7735           // expression.
7736           if (FirstPointerInComplexData) {
7737             QualType Ty = std::prev(I)
7738                               ->getAssociatedDeclaration()
7739                               ->getType()
7740                               .getNonReferenceType();
7741             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7742             FirstPointerInComplexData = false;
7743           }
7744         }
7745       }
7746 
7747       auto Next = std::next(I);
7748 
7749       // We need to generate the addresses and sizes if this is the last
7750       // component, if the component is a pointer or if it is an array section
7751       // whose length can't be proved to be one. If this is a pointer, it
7752       // becomes the base address for the following components.
7753 
7754       // A final array section, is one whose length can't be proved to be one.
7755       // If the map item is non-contiguous then we don't treat any array section
7756       // as final array section.
7757       bool IsFinalArraySection =
7758           !IsNonContiguous &&
7759           isFinalArraySectionExpression(I->getAssociatedExpression());
7760 
7761       // If we have a declaration for the mapping use that, otherwise use
7762       // the base declaration of the map clause.
7763       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7764                                      ? I->getAssociatedDeclaration()
7765                                      : BaseDecl;
7766 
7767       // Get information on whether the element is a pointer. Have to do a
7768       // special treatment for array sections given that they are built-in
7769       // types.
7770       const auto *OASE =
7771           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7772       const auto *OAShE =
7773           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7774       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7775       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7776       bool IsPointer =
7777           OAShE ||
7778           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7779                        .getCanonicalType()
7780                        ->isAnyPointerType()) ||
7781           I->getAssociatedExpression()->getType()->isAnyPointerType();
7782       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7783                                MapDecl &&
7784                                MapDecl->getType()->isLValueReferenceType();
7785       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7786 
7787       if (OASE)
7788         ++DimSize;
7789 
7790       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7791           IsFinalArraySection) {
7792         // If this is not the last component, we expect the pointer to be
7793         // associated with an array expression or member expression.
7794         assert((Next == CE ||
7795                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7796                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7797                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7798                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7799                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7800                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7801                "Unexpected expression");
7802 
7803         Address LB = Address::invalid();
7804         Address LowestElem = Address::invalid();
7805         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7806                                        const MemberExpr *E) {
7807           const Expr *BaseExpr = E->getBase();
7808           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7809           // scalar.
7810           LValue BaseLV;
7811           if (E->isArrow()) {
7812             LValueBaseInfo BaseInfo;
7813             TBAAAccessInfo TBAAInfo;
7814             Address Addr =
7815                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7816             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7817             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7818           } else {
7819             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7820           }
7821           return BaseLV;
7822         };
7823         if (OAShE) {
7824           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7825                                     CGF.getContext().getTypeAlignInChars(
7826                                         OAShE->getBase()->getType()));
7827         } else if (IsMemberReference) {
7828           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7829           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7830           LowestElem = CGF.EmitLValueForFieldInitialization(
7831                               BaseLVal, cast<FieldDecl>(MapDecl))
7832                            .getAddress(CGF);
7833           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7834                    .getAddress(CGF);
7835         } else {
7836           LowestElem = LB =
7837               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7838                   .getAddress(CGF);
7839         }
7840 
7841         // If this component is a pointer inside the base struct then we don't
7842         // need to create any entry for it - it will be combined with the object
7843         // it is pointing to into a single PTR_AND_OBJ entry.
7844         bool IsMemberPointerOrAddr =
7845             EncounteredME &&
7846             (((IsPointer || ForDeviceAddr) &&
7847               I->getAssociatedExpression() == EncounteredME) ||
7848              (IsPrevMemberReference && !IsPointer) ||
7849              (IsMemberReference && Next != CE &&
7850               !Next->getAssociatedExpression()->getType()->isPointerType()));
7851         if (!OverlappedElements.empty() && Next == CE) {
7852           // Handle base element with the info for overlapped elements.
7853           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7854           assert(!IsPointer &&
7855                  "Unexpected base element with the pointer type.");
7856           // Mark the whole struct as the struct that requires allocation on the
7857           // device.
7858           PartialStruct.LowestElem = {0, LowestElem};
7859           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7860               I->getAssociatedExpression()->getType());
7861           Address HB = CGF.Builder.CreateConstGEP(
7862               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7863                                                               CGF.VoidPtrTy),
7864               TypeSize.getQuantity() - 1);
7865           PartialStruct.HighestElem = {
7866               std::numeric_limits<decltype(
7867                   PartialStruct.HighestElem.first)>::max(),
7868               HB};
7869           PartialStruct.Base = BP;
7870           PartialStruct.LB = LB;
7871           assert(
7872               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7873               "Overlapped elements must be used only once for the variable.");
7874           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7875           // Emit data for non-overlapped data.
7876           OpenMPOffloadMappingFlags Flags =
7877               OMP_MAP_MEMBER_OF |
7878               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7879                              /*AddPtrFlag=*/false,
7880                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7881           llvm::Value *Size = nullptr;
7882           // Do bitcopy of all non-overlapped structure elements.
7883           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7884                    Component : OverlappedElements) {
7885             Address ComponentLB = Address::invalid();
7886             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7887                  Component) {
7888               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7889                 const auto *FD = dyn_cast<FieldDecl>(VD);
7890                 if (FD && FD->getType()->isLValueReferenceType()) {
7891                   const auto *ME =
7892                       cast<MemberExpr>(MC.getAssociatedExpression());
7893                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7894                   ComponentLB =
7895                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7896                           .getAddress(CGF);
7897                 } else {
7898                   ComponentLB =
7899                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7900                           .getAddress(CGF);
7901                 }
7902                 Size = CGF.Builder.CreatePtrDiff(
7903                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7904                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7905                 break;
7906               }
7907             }
7908             assert(Size && "Failed to determine structure size");
7909             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7910             CombinedInfo.BasePointers.push_back(BP.getPointer());
7911             CombinedInfo.Pointers.push_back(LB.getPointer());
7912             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7913                 Size, CGF.Int64Ty, /*isSigned=*/true));
7914             CombinedInfo.Types.push_back(Flags);
7915             CombinedInfo.Mappers.push_back(nullptr);
7916             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7917                                                                       : 1);
7918             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7919           }
7920           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7921           CombinedInfo.BasePointers.push_back(BP.getPointer());
7922           CombinedInfo.Pointers.push_back(LB.getPointer());
7923           Size = CGF.Builder.CreatePtrDiff(
7924               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7925               CGF.EmitCastToVoidPtr(LB.getPointer()));
7926           CombinedInfo.Sizes.push_back(
7927               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7928           CombinedInfo.Types.push_back(Flags);
7929           CombinedInfo.Mappers.push_back(nullptr);
7930           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7931                                                                     : 1);
7932           break;
7933         }
7934         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7935         if (!IsMemberPointerOrAddr ||
7936             (Next == CE && MapType != OMPC_MAP_unknown)) {
7937           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7938           CombinedInfo.BasePointers.push_back(BP.getPointer());
7939           CombinedInfo.Pointers.push_back(LB.getPointer());
7940           CombinedInfo.Sizes.push_back(
7941               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7942           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7943                                                                     : 1);
7944 
7945           // If Mapper is valid, the last component inherits the mapper.
7946           bool HasMapper = Mapper && Next == CE;
7947           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7948 
7949           // We need to add a pointer flag for each map that comes from the
7950           // same expression except for the first one. We also need to signal
7951           // this map is the first one that relates with the current capture
7952           // (there is a set of entries for each capture).
7953           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7954               MapType, MapModifiers, MotionModifiers, IsImplicit,
7955               !IsExpressionFirstInfo || RequiresReference ||
7956                   FirstPointerInComplexData || IsMemberReference,
7957               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7958 
7959           if (!IsExpressionFirstInfo || IsMemberReference) {
7960             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7961             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7962             if (IsPointer || (IsMemberReference && Next != CE))
7963               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7964                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7965 
7966             if (ShouldBeMemberOf) {
7967               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7968               // should be later updated with the correct value of MEMBER_OF.
7969               Flags |= OMP_MAP_MEMBER_OF;
7970               // From now on, all subsequent PTR_AND_OBJ entries should not be
7971               // marked as MEMBER_OF.
7972               ShouldBeMemberOf = false;
7973             }
7974           }
7975 
7976           CombinedInfo.Types.push_back(Flags);
7977         }
7978 
7979         // If we have encountered a member expression so far, keep track of the
7980         // mapped member. If the parent is "*this", then the value declaration
7981         // is nullptr.
7982         if (EncounteredME) {
7983           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7984           unsigned FieldIndex = FD->getFieldIndex();
7985 
7986           // Update info about the lowest and highest elements for this struct
7987           if (!PartialStruct.Base.isValid()) {
7988             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7989             if (IsFinalArraySection) {
7990               Address HB =
7991                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7992                       .getAddress(CGF);
7993               PartialStruct.HighestElem = {FieldIndex, HB};
7994             } else {
7995               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7996             }
7997             PartialStruct.Base = BP;
7998             PartialStruct.LB = BP;
7999           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8000             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8001           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8002             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8003           }
8004         }
8005 
8006         // Need to emit combined struct for array sections.
8007         if (IsFinalArraySection || IsNonContiguous)
8008           PartialStruct.IsArraySection = true;
8009 
8010         // If we have a final array section, we are done with this expression.
8011         if (IsFinalArraySection)
8012           break;
8013 
8014         // The pointer becomes the base for the next element.
8015         if (Next != CE)
8016           BP = IsMemberReference ? LowestElem : LB;
8017 
8018         IsExpressionFirstInfo = false;
8019         IsCaptureFirstInfo = false;
8020         FirstPointerInComplexData = false;
8021         IsPrevMemberReference = IsMemberReference;
8022       } else if (FirstPointerInComplexData) {
8023         QualType Ty = Components.rbegin()
8024                           ->getAssociatedDeclaration()
8025                           ->getType()
8026                           .getNonReferenceType();
8027         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8028         FirstPointerInComplexData = false;
8029       }
8030     }
8031     // If ran into the whole component - allocate the space for the whole
8032     // record.
8033     if (!EncounteredME)
8034       PartialStruct.HasCompleteRecord = true;
8035 
8036     if (!IsNonContiguous)
8037       return;
8038 
8039     const ASTContext &Context = CGF.getContext();
8040 
8041     // For supporting stride in array section, we need to initialize the first
8042     // dimension size as 1, first offset as 0, and first count as 1
8043     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8044     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8045     MapValuesArrayTy CurStrides;
8046     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8047     uint64_t ElementTypeSize;
8048 
8049     // Collect Size information for each dimension and get the element size as
8050     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8051     // should be [10, 10] and the first stride is 4 btyes.
8052     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8053          Components) {
8054       const Expr *AssocExpr = Component.getAssociatedExpression();
8055       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8056 
8057       if (!OASE)
8058         continue;
8059 
8060       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8061       auto *CAT = Context.getAsConstantArrayType(Ty);
8062       auto *VAT = Context.getAsVariableArrayType(Ty);
8063 
8064       // We need all the dimension size except for the last dimension.
8065       assert((VAT || CAT || &Component == &*Components.begin()) &&
8066              "Should be either ConstantArray or VariableArray if not the "
8067              "first Component");
8068 
8069       // Get element size if CurStrides is empty.
8070       if (CurStrides.empty()) {
8071         const Type *ElementType = nullptr;
8072         if (CAT)
8073           ElementType = CAT->getElementType().getTypePtr();
8074         else if (VAT)
8075           ElementType = VAT->getElementType().getTypePtr();
8076         else
8077           assert(&Component == &*Components.begin() &&
8078                  "Only expect pointer (non CAT or VAT) when this is the "
8079                  "first Component");
8080         // If ElementType is null, then it means the base is a pointer
8081         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8082         // for next iteration.
8083         if (ElementType) {
8084           // For the case that having pointer as base, we need to remove one
8085           // level of indirection.
8086           if (&Component != &*Components.begin())
8087             ElementType = ElementType->getPointeeOrArrayElementType();
8088           ElementTypeSize =
8089               Context.getTypeSizeInChars(ElementType).getQuantity();
8090           CurStrides.push_back(
8091               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8092         }
8093       }
8094       // Get dimension value except for the last dimension since we don't need
8095       // it.
8096       if (DimSizes.size() < Components.size() - 1) {
8097         if (CAT)
8098           DimSizes.push_back(llvm::ConstantInt::get(
8099               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8100         else if (VAT)
8101           DimSizes.push_back(CGF.Builder.CreateIntCast(
8102               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8103               /*IsSigned=*/false));
8104       }
8105     }
8106 
8107     // Skip the dummy dimension since we have already have its information.
8108     auto DI = DimSizes.begin() + 1;
8109     // Product of dimension.
8110     llvm::Value *DimProd =
8111         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8112 
8113     // Collect info for non-contiguous. Notice that offset, count, and stride
8114     // are only meaningful for array-section, so we insert a null for anything
8115     // other than array-section.
8116     // Also, the size of offset, count, and stride are not the same as
8117     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8118     // count, and stride are the same as the number of non-contiguous
8119     // declaration in target update to/from clause.
8120     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8121          Components) {
8122       const Expr *AssocExpr = Component.getAssociatedExpression();
8123 
8124       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8125         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8126             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8127             /*isSigned=*/false);
8128         CurOffsets.push_back(Offset);
8129         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8130         CurStrides.push_back(CurStrides.back());
8131         continue;
8132       }
8133 
8134       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8135 
8136       if (!OASE)
8137         continue;
8138 
8139       // Offset
8140       const Expr *OffsetExpr = OASE->getLowerBound();
8141       llvm::Value *Offset = nullptr;
8142       if (!OffsetExpr) {
8143         // If offset is absent, then we just set it to zero.
8144         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8145       } else {
8146         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8147                                            CGF.Int64Ty,
8148                                            /*isSigned=*/false);
8149       }
8150       CurOffsets.push_back(Offset);
8151 
8152       // Count
8153       const Expr *CountExpr = OASE->getLength();
8154       llvm::Value *Count = nullptr;
8155       if (!CountExpr) {
8156         // In Clang, once a high dimension is an array section, we construct all
8157         // the lower dimension as array section, however, for case like
8158         // arr[0:2][2], Clang construct the inner dimension as an array section
8159         // but it actually is not in an array section form according to spec.
8160         if (!OASE->getColonLocFirst().isValid() &&
8161             !OASE->getColonLocSecond().isValid()) {
8162           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8163         } else {
8164           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8165           // When the length is absent it defaults to ⌈(size −
8166           // lower-bound)/stride⌉, where size is the size of the array
8167           // dimension.
8168           const Expr *StrideExpr = OASE->getStride();
8169           llvm::Value *Stride =
8170               StrideExpr
8171                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8172                                               CGF.Int64Ty, /*isSigned=*/false)
8173                   : nullptr;
8174           if (Stride)
8175             Count = CGF.Builder.CreateUDiv(
8176                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8177           else
8178             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8179         }
8180       } else {
8181         Count = CGF.EmitScalarExpr(CountExpr);
8182       }
8183       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8184       CurCounts.push_back(Count);
8185 
8186       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8187       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8188       //              Offset      Count     Stride
8189       //    D0          0           1         4    (int)    <- dummy dimension
8190       //    D1          0           2         8    (2 * (1) * 4)
8191       //    D2          1           2         20   (1 * (1 * 5) * 4)
8192       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8193       const Expr *StrideExpr = OASE->getStride();
8194       llvm::Value *Stride =
8195           StrideExpr
8196               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8197                                           CGF.Int64Ty, /*isSigned=*/false)
8198               : nullptr;
8199       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8200       if (Stride)
8201         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8202       else
8203         CurStrides.push_back(DimProd);
8204       if (DI != DimSizes.end())
8205         ++DI;
8206     }
8207 
8208     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8209     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8210     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8211   }
8212 
8213   /// Return the adjusted map modifiers if the declaration a capture refers to
8214   /// appears in a first-private clause. This is expected to be used only with
8215   /// directives that start with 'target'.
8216   MappableExprsHandler::OpenMPOffloadMappingFlags
8217   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8218     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8219 
8220     // A first private variable captured by reference will use only the
8221     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8222     // declaration is known as first-private in this handler.
8223     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8224       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8225           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8226         return MappableExprsHandler::OMP_MAP_ALWAYS |
8227                MappableExprsHandler::OMP_MAP_TO;
8228       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8229         return MappableExprsHandler::OMP_MAP_TO |
8230                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8231       return MappableExprsHandler::OMP_MAP_PRIVATE |
8232              MappableExprsHandler::OMP_MAP_TO;
8233     }
8234     return MappableExprsHandler::OMP_MAP_TO |
8235            MappableExprsHandler::OMP_MAP_FROM;
8236   }
8237 
8238   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8239     // Rotate by getFlagMemberOffset() bits.
8240     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8241                                                   << getFlagMemberOffset());
8242   }
8243 
8244   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8245                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8246     // If the entry is PTR_AND_OBJ but has not been marked with the special
8247     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8248     // marked as MEMBER_OF.
8249     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8250         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8251       return;
8252 
8253     // Reset the placeholder value to prepare the flag for the assignment of the
8254     // proper MEMBER_OF value.
8255     Flags &= ~OMP_MAP_MEMBER_OF;
8256     Flags |= MemberOfFlag;
8257   }
8258 
8259   void getPlainLayout(const CXXRecordDecl *RD,
8260                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8261                       bool AsBase) const {
8262     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8263 
8264     llvm::StructType *St =
8265         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8266 
8267     unsigned NumElements = St->getNumElements();
8268     llvm::SmallVector<
8269         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8270         RecordLayout(NumElements);
8271 
8272     // Fill bases.
8273     for (const auto &I : RD->bases()) {
8274       if (I.isVirtual())
8275         continue;
8276       const auto *Base = I.getType()->getAsCXXRecordDecl();
8277       // Ignore empty bases.
8278       if (Base->isEmpty() || CGF.getContext()
8279                                  .getASTRecordLayout(Base)
8280                                  .getNonVirtualSize()
8281                                  .isZero())
8282         continue;
8283 
8284       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8285       RecordLayout[FieldIndex] = Base;
8286     }
8287     // Fill in virtual bases.
8288     for (const auto &I : RD->vbases()) {
8289       const auto *Base = I.getType()->getAsCXXRecordDecl();
8290       // Ignore empty bases.
8291       if (Base->isEmpty())
8292         continue;
8293       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8294       if (RecordLayout[FieldIndex])
8295         continue;
8296       RecordLayout[FieldIndex] = Base;
8297     }
8298     // Fill in all the fields.
8299     assert(!RD->isUnion() && "Unexpected union.");
8300     for (const auto *Field : RD->fields()) {
8301       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8302       // will fill in later.)
8303       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8304         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8305         RecordLayout[FieldIndex] = Field;
8306       }
8307     }
8308     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8309              &Data : RecordLayout) {
8310       if (Data.isNull())
8311         continue;
8312       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8313         getPlainLayout(Base, Layout, /*AsBase=*/true);
8314       else
8315         Layout.push_back(Data.get<const FieldDecl *>());
8316     }
8317   }
8318 
8319   /// Generate all the base pointers, section pointers, sizes, map types, and
8320   /// mappers for the extracted mappable expressions (all included in \a
8321   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8322   /// pair of the relevant declaration and index where it occurs is appended to
8323   /// the device pointers info array.
8324   void generateAllInfoForClauses(
8325       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8326       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8327           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8328     // We have to process the component lists that relate with the same
8329     // declaration in a single chunk so that we can generate the map flags
8330     // correctly. Therefore, we organize all lists in a map.
8331     enum MapKind { Present, Allocs, Other, Total };
8332     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8333                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8334         Info;
8335 
8336     // Helper function to fill the information map for the different supported
8337     // clauses.
8338     auto &&InfoGen =
8339         [&Info, &SkipVarSet](
8340             const ValueDecl *D, MapKind Kind,
8341             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8342             OpenMPMapClauseKind MapType,
8343             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8344             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8345             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8346             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8347           if (SkipVarSet.contains(D))
8348             return;
8349           auto It = Info.find(D);
8350           if (It == Info.end())
8351             It = Info
8352                      .insert(std::make_pair(
8353                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8354                      .first;
8355           It->second[Kind].emplace_back(
8356               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8357               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8358         };
8359 
8360     for (const auto *Cl : Clauses) {
8361       const auto *C = dyn_cast<OMPMapClause>(Cl);
8362       if (!C)
8363         continue;
8364       MapKind Kind = Other;
8365       if (!C->getMapTypeModifiers().empty() &&
8366           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8367             return K == OMPC_MAP_MODIFIER_present;
8368           }))
8369         Kind = Present;
8370       else if (C->getMapType() == OMPC_MAP_alloc)
8371         Kind = Allocs;
8372       const auto *EI = C->getVarRefs().begin();
8373       for (const auto L : C->component_lists()) {
8374         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8375         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8376                 C->getMapTypeModifiers(), llvm::None,
8377                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8378                 E);
8379         ++EI;
8380       }
8381     }
8382     for (const auto *Cl : Clauses) {
8383       const auto *C = dyn_cast<OMPToClause>(Cl);
8384       if (!C)
8385         continue;
8386       MapKind Kind = Other;
8387       if (!C->getMotionModifiers().empty() &&
8388           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8389             return K == OMPC_MOTION_MODIFIER_present;
8390           }))
8391         Kind = Present;
8392       const auto *EI = C->getVarRefs().begin();
8393       for (const auto L : C->component_lists()) {
8394         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8395                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8396                 C->isImplicit(), std::get<2>(L), *EI);
8397         ++EI;
8398       }
8399     }
8400     for (const auto *Cl : Clauses) {
8401       const auto *C = dyn_cast<OMPFromClause>(Cl);
8402       if (!C)
8403         continue;
8404       MapKind Kind = Other;
8405       if (!C->getMotionModifiers().empty() &&
8406           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8407             return K == OMPC_MOTION_MODIFIER_present;
8408           }))
8409         Kind = Present;
8410       const auto *EI = C->getVarRefs().begin();
8411       for (const auto L : C->component_lists()) {
8412         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8413                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8414                 C->isImplicit(), std::get<2>(L), *EI);
8415         ++EI;
8416       }
8417     }
8418 
8419     // Look at the use_device_ptr clause information and mark the existing map
8420     // entries as such. If there is no map information for an entry in the
8421     // use_device_ptr list, we create one with map type 'alloc' and zero size
8422     // section. It is the user fault if that was not mapped before. If there is
8423     // no map information and the pointer is a struct member, then we defer the
8424     // emission of that entry until the whole struct has been processed.
8425     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8426                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8427         DeferredInfo;
8428     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8429 
8430     for (const auto *Cl : Clauses) {
8431       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8432       if (!C)
8433         continue;
8434       for (const auto L : C->component_lists()) {
8435         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8436             std::get<1>(L);
8437         assert(!Components.empty() &&
8438                "Not expecting empty list of components!");
8439         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8440         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8441         const Expr *IE = Components.back().getAssociatedExpression();
8442         // If the first component is a member expression, we have to look into
8443         // 'this', which maps to null in the map of map information. Otherwise
8444         // look directly for the information.
8445         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8446 
8447         // We potentially have map information for this declaration already.
8448         // Look for the first set of components that refer to it.
8449         if (It != Info.end()) {
8450           bool Found = false;
8451           for (auto &Data : It->second) {
8452             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8453               return MI.Components.back().getAssociatedDeclaration() == VD;
8454             });
8455             // If we found a map entry, signal that the pointer has to be
8456             // returned and move on to the next declaration. Exclude cases where
8457             // the base pointer is mapped as array subscript, array section or
8458             // array shaping. The base address is passed as a pointer to base in
8459             // this case and cannot be used as a base for use_device_ptr list
8460             // item.
8461             if (CI != Data.end()) {
8462               auto PrevCI = std::next(CI->Components.rbegin());
8463               const auto *VarD = dyn_cast<VarDecl>(VD);
8464               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8465                   isa<MemberExpr>(IE) ||
8466                   !VD->getType().getNonReferenceType()->isPointerType() ||
8467                   PrevCI == CI->Components.rend() ||
8468                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8469                   VarD->hasLocalStorage()) {
8470                 CI->ReturnDevicePointer = true;
8471                 Found = true;
8472                 break;
8473               }
8474             }
8475           }
8476           if (Found)
8477             continue;
8478         }
8479 
8480         // We didn't find any match in our map information - generate a zero
8481         // size array section - if the pointer is a struct member we defer this
8482         // action until the whole struct has been processed.
8483         if (isa<MemberExpr>(IE)) {
8484           // Insert the pointer into Info to be processed by
8485           // generateInfoForComponentList. Because it is a member pointer
8486           // without a pointee, no entry will be generated for it, therefore
8487           // we need to generate one after the whole struct has been processed.
8488           // Nonetheless, generateInfoForComponentList must be called to take
8489           // the pointer into account for the calculation of the range of the
8490           // partial struct.
8491           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8492                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8493                   nullptr);
8494           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8495         } else {
8496           llvm::Value *Ptr =
8497               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8498           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8499           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8500           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8501           UseDevicePtrCombinedInfo.Sizes.push_back(
8502               llvm::Constant::getNullValue(CGF.Int64Ty));
8503           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8504           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8505         }
8506       }
8507     }
8508 
8509     // Look at the use_device_addr clause information and mark the existing map
8510     // entries as such. If there is no map information for an entry in the
8511     // use_device_addr list, we create one with map type 'alloc' and zero size
8512     // section. It is the user fault if that was not mapped before. If there is
8513     // no map information and the pointer is a struct member, then we defer the
8514     // emission of that entry until the whole struct has been processed.
8515     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8516     for (const auto *Cl : Clauses) {
8517       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8518       if (!C)
8519         continue;
8520       for (const auto L : C->component_lists()) {
8521         assert(!std::get<1>(L).empty() &&
8522                "Not expecting empty list of components!");
8523         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8524         if (!Processed.insert(VD).second)
8525           continue;
8526         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8527         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8528         // If the first component is a member expression, we have to look into
8529         // 'this', which maps to null in the map of map information. Otherwise
8530         // look directly for the information.
8531         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8532 
8533         // We potentially have map information for this declaration already.
8534         // Look for the first set of components that refer to it.
8535         if (It != Info.end()) {
8536           bool Found = false;
8537           for (auto &Data : It->second) {
8538             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8539               return MI.Components.back().getAssociatedDeclaration() == VD;
8540             });
8541             // If we found a map entry, signal that the pointer has to be
8542             // returned and move on to the next declaration.
8543             if (CI != Data.end()) {
8544               CI->ReturnDevicePointer = true;
8545               Found = true;
8546               break;
8547             }
8548           }
8549           if (Found)
8550             continue;
8551         }
8552 
8553         // We didn't find any match in our map information - generate a zero
8554         // size array section - if the pointer is a struct member we defer this
8555         // action until the whole struct has been processed.
8556         if (isa<MemberExpr>(IE)) {
8557           // Insert the pointer into Info to be processed by
8558           // generateInfoForComponentList. Because it is a member pointer
8559           // without a pointee, no entry will be generated for it, therefore
8560           // we need to generate one after the whole struct has been processed.
8561           // Nonetheless, generateInfoForComponentList must be called to take
8562           // the pointer into account for the calculation of the range of the
8563           // partial struct.
8564           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8565                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8566                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8567           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8568         } else {
8569           llvm::Value *Ptr;
8570           if (IE->isGLValue())
8571             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8572           else
8573             Ptr = CGF.EmitScalarExpr(IE);
8574           CombinedInfo.Exprs.push_back(VD);
8575           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8576           CombinedInfo.Pointers.push_back(Ptr);
8577           CombinedInfo.Sizes.push_back(
8578               llvm::Constant::getNullValue(CGF.Int64Ty));
8579           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8580           CombinedInfo.Mappers.push_back(nullptr);
8581         }
8582       }
8583     }
8584 
8585     for (const auto &Data : Info) {
8586       StructRangeInfoTy PartialStruct;
8587       // Temporary generated information.
8588       MapCombinedInfoTy CurInfo;
8589       const Decl *D = Data.first;
8590       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8591       for (const auto &M : Data.second) {
8592         for (const MapInfo &L : M) {
8593           assert(!L.Components.empty() &&
8594                  "Not expecting declaration with no component lists.");
8595 
8596           // Remember the current base pointer index.
8597           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8598           CurInfo.NonContigInfo.IsNonContiguous =
8599               L.Components.back().isNonContiguous();
8600           generateInfoForComponentList(
8601               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8602               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8603               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8604 
8605           // If this entry relates with a device pointer, set the relevant
8606           // declaration and add the 'return pointer' flag.
8607           if (L.ReturnDevicePointer) {
8608             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8609                    "Unexpected number of mapped base pointers.");
8610 
8611             const ValueDecl *RelevantVD =
8612                 L.Components.back().getAssociatedDeclaration();
8613             assert(RelevantVD &&
8614                    "No relevant declaration related with device pointer??");
8615 
8616             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8617                 RelevantVD);
8618             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8619           }
8620         }
8621       }
8622 
8623       // Append any pending zero-length pointers which are struct members and
8624       // used with use_device_ptr or use_device_addr.
8625       auto CI = DeferredInfo.find(Data.first);
8626       if (CI != DeferredInfo.end()) {
8627         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8628           llvm::Value *BasePtr;
8629           llvm::Value *Ptr;
8630           if (L.ForDeviceAddr) {
8631             if (L.IE->isGLValue())
8632               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8633             else
8634               Ptr = this->CGF.EmitScalarExpr(L.IE);
8635             BasePtr = Ptr;
8636             // Entry is RETURN_PARAM. Also, set the placeholder value
8637             // MEMBER_OF=FFFF so that the entry is later updated with the
8638             // correct value of MEMBER_OF.
8639             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8640           } else {
8641             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8642             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8643                                              L.IE->getExprLoc());
8644             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8645             // placeholder value MEMBER_OF=FFFF so that the entry is later
8646             // updated with the correct value of MEMBER_OF.
8647             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8648                                     OMP_MAP_MEMBER_OF);
8649           }
8650           CurInfo.Exprs.push_back(L.VD);
8651           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8652           CurInfo.Pointers.push_back(Ptr);
8653           CurInfo.Sizes.push_back(
8654               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8655           CurInfo.Mappers.push_back(nullptr);
8656         }
8657       }
8658       // If there is an entry in PartialStruct it means we have a struct with
8659       // individual members mapped. Emit an extra combined entry.
8660       if (PartialStruct.Base.isValid()) {
8661         CurInfo.NonContigInfo.Dims.push_back(0);
8662         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8663       }
8664 
8665       // We need to append the results of this capture to what we already
8666       // have.
8667       CombinedInfo.append(CurInfo);
8668     }
8669     // Append data for use_device_ptr clauses.
8670     CombinedInfo.append(UseDevicePtrCombinedInfo);
8671   }
8672 
8673 public:
8674   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8675       : CurDir(&Dir), CGF(CGF) {
8676     // Extract firstprivate clause information.
8677     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8678       for (const auto *D : C->varlists())
8679         FirstPrivateDecls.try_emplace(
8680             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8681     // Extract implicit firstprivates from uses_allocators clauses.
8682     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8683       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8684         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8685         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8686           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8687                                         /*Implicit=*/true);
8688         else if (const auto *VD = dyn_cast<VarDecl>(
8689                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8690                          ->getDecl()))
8691           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8692       }
8693     }
8694     // Extract device pointer clause information.
8695     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8696       for (auto L : C->component_lists())
8697         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8698   }
8699 
8700   /// Constructor for the declare mapper directive.
8701   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8702       : CurDir(&Dir), CGF(CGF) {}
8703 
8704   /// Generate code for the combined entry if we have a partially mapped struct
8705   /// and take care of the mapping flags of the arguments corresponding to
8706   /// individual struct members.
8707   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8708                          MapFlagsArrayTy &CurTypes,
8709                          const StructRangeInfoTy &PartialStruct,
8710                          const ValueDecl *VD = nullptr,
8711                          bool NotTargetParams = true) const {
8712     if (CurTypes.size() == 1 &&
8713         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8714         !PartialStruct.IsArraySection)
8715       return;
8716     Address LBAddr = PartialStruct.LowestElem.second;
8717     Address HBAddr = PartialStruct.HighestElem.second;
8718     if (PartialStruct.HasCompleteRecord) {
8719       LBAddr = PartialStruct.LB;
8720       HBAddr = PartialStruct.LB;
8721     }
8722     CombinedInfo.Exprs.push_back(VD);
8723     // Base is the base of the struct
8724     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8725     // Pointer is the address of the lowest element
8726     llvm::Value *LB = LBAddr.getPointer();
8727     CombinedInfo.Pointers.push_back(LB);
8728     // There should not be a mapper for a combined entry.
8729     CombinedInfo.Mappers.push_back(nullptr);
8730     // Size is (addr of {highest+1} element) - (addr of lowest element)
8731     llvm::Value *HB = HBAddr.getPointer();
8732     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8733     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8734     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8735     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8736     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8737                                                   /*isSigned=*/false);
8738     CombinedInfo.Sizes.push_back(Size);
8739     // Map type is always TARGET_PARAM, if generate info for captures.
8740     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8741                                                  : OMP_MAP_TARGET_PARAM);
8742     // If any element has the present modifier, then make sure the runtime
8743     // doesn't attempt to allocate the struct.
8744     if (CurTypes.end() !=
8745         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8746           return Type & OMP_MAP_PRESENT;
8747         }))
8748       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8749     // Remove TARGET_PARAM flag from the first element
8750     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8751 
8752     // All other current entries will be MEMBER_OF the combined entry
8753     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8754     // 0xFFFF in the MEMBER_OF field).
8755     OpenMPOffloadMappingFlags MemberOfFlag =
8756         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8757     for (auto &M : CurTypes)
8758       setCorrectMemberOfFlag(M, MemberOfFlag);
8759   }
8760 
8761   /// Generate all the base pointers, section pointers, sizes, map types, and
8762   /// mappers for the extracted mappable expressions (all included in \a
8763   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8764   /// pair of the relevant declaration and index where it occurs is appended to
8765   /// the device pointers info array.
8766   void generateAllInfo(
8767       MapCombinedInfoTy &CombinedInfo,
8768       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8769           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8770     assert(CurDir.is<const OMPExecutableDirective *>() &&
8771            "Expect a executable directive");
8772     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8773     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8774   }
8775 
8776   /// Generate all the base pointers, section pointers, sizes, map types, and
8777   /// mappers for the extracted map clauses of user-defined mapper (all included
8778   /// in \a CombinedInfo).
8779   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8780     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8781            "Expect a declare mapper directive");
8782     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8783     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8784   }
8785 
8786   /// Emit capture info for lambdas for variables captured by reference.
8787   void generateInfoForLambdaCaptures(
8788       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8789       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8790     const auto *RD = VD->getType()
8791                          .getCanonicalType()
8792                          .getNonReferenceType()
8793                          ->getAsCXXRecordDecl();
8794     if (!RD || !RD->isLambda())
8795       return;
8796     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8797     LValue VDLVal = CGF.MakeAddrLValue(
8798         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8799     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8800     FieldDecl *ThisCapture = nullptr;
8801     RD->getCaptureFields(Captures, ThisCapture);
8802     if (ThisCapture) {
8803       LValue ThisLVal =
8804           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8805       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8806       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8807                                  VDLVal.getPointer(CGF));
8808       CombinedInfo.Exprs.push_back(VD);
8809       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8810       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8811       CombinedInfo.Sizes.push_back(
8812           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8813                                     CGF.Int64Ty, /*isSigned=*/true));
8814       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8815                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8816       CombinedInfo.Mappers.push_back(nullptr);
8817     }
8818     for (const LambdaCapture &LC : RD->captures()) {
8819       if (!LC.capturesVariable())
8820         continue;
8821       const VarDecl *VD = LC.getCapturedVar();
8822       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8823         continue;
8824       auto It = Captures.find(VD);
8825       assert(It != Captures.end() && "Found lambda capture without field.");
8826       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8827       if (LC.getCaptureKind() == LCK_ByRef) {
8828         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8829         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8830                                    VDLVal.getPointer(CGF));
8831         CombinedInfo.Exprs.push_back(VD);
8832         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8833         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8834         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8835             CGF.getTypeSize(
8836                 VD->getType().getCanonicalType().getNonReferenceType()),
8837             CGF.Int64Ty, /*isSigned=*/true));
8838       } else {
8839         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8840         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8841                                    VDLVal.getPointer(CGF));
8842         CombinedInfo.Exprs.push_back(VD);
8843         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8844         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8845         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8846       }
8847       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8848                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8849       CombinedInfo.Mappers.push_back(nullptr);
8850     }
8851   }
8852 
8853   /// Set correct indices for lambdas captures.
8854   void adjustMemberOfForLambdaCaptures(
8855       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8856       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8857       MapFlagsArrayTy &Types) const {
8858     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8859       // Set correct member_of idx for all implicit lambda captures.
8860       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8861                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8862         continue;
8863       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8864       assert(BasePtr && "Unable to find base lambda address.");
8865       int TgtIdx = -1;
8866       for (unsigned J = I; J > 0; --J) {
8867         unsigned Idx = J - 1;
8868         if (Pointers[Idx] != BasePtr)
8869           continue;
8870         TgtIdx = Idx;
8871         break;
8872       }
8873       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8874       // All other current entries will be MEMBER_OF the combined entry
8875       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8876       // 0xFFFF in the MEMBER_OF field).
8877       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8878       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8879     }
8880   }
8881 
8882   /// Generate the base pointers, section pointers, sizes, map types, and
8883   /// mappers associated to a given capture (all included in \a CombinedInfo).
8884   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8885                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8886                               StructRangeInfoTy &PartialStruct) const {
8887     assert(!Cap->capturesVariableArrayType() &&
8888            "Not expecting to generate map info for a variable array type!");
8889 
8890     // We need to know when we generating information for the first component
8891     const ValueDecl *VD = Cap->capturesThis()
8892                               ? nullptr
8893                               : Cap->getCapturedVar()->getCanonicalDecl();
8894 
8895     // If this declaration appears in a is_device_ptr clause we just have to
8896     // pass the pointer by value. If it is a reference to a declaration, we just
8897     // pass its value.
8898     if (DevPointersMap.count(VD)) {
8899       CombinedInfo.Exprs.push_back(VD);
8900       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8901       CombinedInfo.Pointers.push_back(Arg);
8902       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8903           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8904           /*isSigned=*/true));
8905       CombinedInfo.Types.push_back(
8906           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8907           OMP_MAP_TARGET_PARAM);
8908       CombinedInfo.Mappers.push_back(nullptr);
8909       return;
8910     }
8911 
8912     using MapData =
8913         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8914                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8915                    const ValueDecl *, const Expr *>;
8916     SmallVector<MapData, 4> DeclComponentLists;
8917     assert(CurDir.is<const OMPExecutableDirective *>() &&
8918            "Expect a executable directive");
8919     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8920     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8921       const auto *EI = C->getVarRefs().begin();
8922       for (const auto L : C->decl_component_lists(VD)) {
8923         const ValueDecl *VDecl, *Mapper;
8924         // The Expression is not correct if the mapping is implicit
8925         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8926         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8927         std::tie(VDecl, Components, Mapper) = L;
8928         assert(VDecl == VD && "We got information for the wrong declaration??");
8929         assert(!Components.empty() &&
8930                "Not expecting declaration with no component lists.");
8931         DeclComponentLists.emplace_back(Components, C->getMapType(),
8932                                         C->getMapTypeModifiers(),
8933                                         C->isImplicit(), Mapper, E);
8934         ++EI;
8935       }
8936     }
8937     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8938                                              const MapData &RHS) {
8939       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8940       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8941       bool HasPresent = !MapModifiers.empty() &&
8942                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8943                           return K == clang::OMPC_MAP_MODIFIER_present;
8944                         });
8945       bool HasAllocs = MapType == OMPC_MAP_alloc;
8946       MapModifiers = std::get<2>(RHS);
8947       MapType = std::get<1>(LHS);
8948       bool HasPresentR =
8949           !MapModifiers.empty() &&
8950           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8951             return K == clang::OMPC_MAP_MODIFIER_present;
8952           });
8953       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8954       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8955     });
8956 
8957     // Find overlapping elements (including the offset from the base element).
8958     llvm::SmallDenseMap<
8959         const MapData *,
8960         llvm::SmallVector<
8961             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8962         4>
8963         OverlappedData;
8964     size_t Count = 0;
8965     for (const MapData &L : DeclComponentLists) {
8966       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8967       OpenMPMapClauseKind MapType;
8968       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8969       bool IsImplicit;
8970       const ValueDecl *Mapper;
8971       const Expr *VarRef;
8972       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8973           L;
8974       ++Count;
8975       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8976         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8977         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8978                  VarRef) = L1;
8979         auto CI = Components.rbegin();
8980         auto CE = Components.rend();
8981         auto SI = Components1.rbegin();
8982         auto SE = Components1.rend();
8983         for (; CI != CE && SI != SE; ++CI, ++SI) {
8984           if (CI->getAssociatedExpression()->getStmtClass() !=
8985               SI->getAssociatedExpression()->getStmtClass())
8986             break;
8987           // Are we dealing with different variables/fields?
8988           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8989             break;
8990         }
8991         // Found overlapping if, at least for one component, reached the head
8992         // of the components list.
8993         if (CI == CE || SI == SE) {
8994           // Ignore it if it is the same component.
8995           if (CI == CE && SI == SE)
8996             continue;
8997           const auto It = (SI == SE) ? CI : SI;
8998           // If one component is a pointer and another one is a kind of
8999           // dereference of this pointer (array subscript, section, dereference,
9000           // etc.), it is not an overlapping.
9001           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9002               std::prev(It)
9003                   ->getAssociatedExpression()
9004                   ->getType()
9005                   ->isPointerType())
9006             continue;
9007           const MapData &BaseData = CI == CE ? L : L1;
9008           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9009               SI == SE ? Components : Components1;
9010           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9011           OverlappedElements.getSecond().push_back(SubData);
9012         }
9013       }
9014     }
9015     // Sort the overlapped elements for each item.
9016     llvm::SmallVector<const FieldDecl *, 4> Layout;
9017     if (!OverlappedData.empty()) {
9018       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9019       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9020       while (BaseType != OrigType) {
9021         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9022         OrigType = BaseType->getPointeeOrArrayElementType();
9023       }
9024 
9025       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9026         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9027       else {
9028         const auto *RD = BaseType->getAsRecordDecl();
9029         Layout.append(RD->field_begin(), RD->field_end());
9030       }
9031     }
9032     for (auto &Pair : OverlappedData) {
9033       llvm::stable_sort(
9034           Pair.getSecond(),
9035           [&Layout](
9036               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9037               OMPClauseMappableExprCommon::MappableExprComponentListRef
9038                   Second) {
9039             auto CI = First.rbegin();
9040             auto CE = First.rend();
9041             auto SI = Second.rbegin();
9042             auto SE = Second.rend();
9043             for (; CI != CE && SI != SE; ++CI, ++SI) {
9044               if (CI->getAssociatedExpression()->getStmtClass() !=
9045                   SI->getAssociatedExpression()->getStmtClass())
9046                 break;
9047               // Are we dealing with different variables/fields?
9048               if (CI->getAssociatedDeclaration() !=
9049                   SI->getAssociatedDeclaration())
9050                 break;
9051             }
9052 
9053             // Lists contain the same elements.
9054             if (CI == CE && SI == SE)
9055               return false;
9056 
9057             // List with less elements is less than list with more elements.
9058             if (CI == CE || SI == SE)
9059               return CI == CE;
9060 
9061             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9062             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9063             if (FD1->getParent() == FD2->getParent())
9064               return FD1->getFieldIndex() < FD2->getFieldIndex();
9065             const auto It =
9066                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9067                   return FD == FD1 || FD == FD2;
9068                 });
9069             return *It == FD1;
9070           });
9071     }
9072 
9073     // Associated with a capture, because the mapping flags depend on it.
9074     // Go through all of the elements with the overlapped elements.
9075     bool IsFirstComponentList = true;
9076     for (const auto &Pair : OverlappedData) {
9077       const MapData &L = *Pair.getFirst();
9078       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9079       OpenMPMapClauseKind MapType;
9080       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9081       bool IsImplicit;
9082       const ValueDecl *Mapper;
9083       const Expr *VarRef;
9084       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9085           L;
9086       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9087           OverlappedComponents = Pair.getSecond();
9088       generateInfoForComponentList(
9089           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9090           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9091           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9092       IsFirstComponentList = false;
9093     }
9094     // Go through other elements without overlapped elements.
9095     for (const MapData &L : DeclComponentLists) {
9096       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9097       OpenMPMapClauseKind MapType;
9098       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9099       bool IsImplicit;
9100       const ValueDecl *Mapper;
9101       const Expr *VarRef;
9102       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9103           L;
9104       auto It = OverlappedData.find(&L);
9105       if (It == OverlappedData.end())
9106         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9107                                      Components, CombinedInfo, PartialStruct,
9108                                      IsFirstComponentList, IsImplicit, Mapper,
9109                                      /*ForDeviceAddr=*/false, VD, VarRef);
9110       IsFirstComponentList = false;
9111     }
9112   }
9113 
9114   /// Generate the default map information for a given capture \a CI,
9115   /// record field declaration \a RI and captured value \a CV.
9116   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9117                               const FieldDecl &RI, llvm::Value *CV,
9118                               MapCombinedInfoTy &CombinedInfo) const {
9119     bool IsImplicit = true;
9120     // Do the default mapping.
9121     if (CI.capturesThis()) {
9122       CombinedInfo.Exprs.push_back(nullptr);
9123       CombinedInfo.BasePointers.push_back(CV);
9124       CombinedInfo.Pointers.push_back(CV);
9125       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9126       CombinedInfo.Sizes.push_back(
9127           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9128                                     CGF.Int64Ty, /*isSigned=*/true));
9129       // Default map type.
9130       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9131     } else if (CI.capturesVariableByCopy()) {
9132       const VarDecl *VD = CI.getCapturedVar();
9133       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9134       CombinedInfo.BasePointers.push_back(CV);
9135       CombinedInfo.Pointers.push_back(CV);
9136       if (!RI.getType()->isAnyPointerType()) {
9137         // We have to signal to the runtime captures passed by value that are
9138         // not pointers.
9139         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9140         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9141             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9142       } else {
9143         // Pointers are implicitly mapped with a zero size and no flags
9144         // (other than first map that is added for all implicit maps).
9145         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9146         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9147       }
9148       auto I = FirstPrivateDecls.find(VD);
9149       if (I != FirstPrivateDecls.end())
9150         IsImplicit = I->getSecond();
9151     } else {
9152       assert(CI.capturesVariable() && "Expected captured reference.");
9153       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9154       QualType ElementType = PtrTy->getPointeeType();
9155       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9156           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9157       // The default map type for a scalar/complex type is 'to' because by
9158       // default the value doesn't have to be retrieved. For an aggregate
9159       // type, the default is 'tofrom'.
9160       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9161       const VarDecl *VD = CI.getCapturedVar();
9162       auto I = FirstPrivateDecls.find(VD);
9163       if (I != FirstPrivateDecls.end() &&
9164           VD->getType().isConstant(CGF.getContext())) {
9165         llvm::Constant *Addr =
9166             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9167         // Copy the value of the original variable to the new global copy.
9168         CGF.Builder.CreateMemCpy(
9169             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9170             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9171             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9172         // Use new global variable as the base pointers.
9173         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9174         CombinedInfo.BasePointers.push_back(Addr);
9175         CombinedInfo.Pointers.push_back(Addr);
9176       } else {
9177         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9178         CombinedInfo.BasePointers.push_back(CV);
9179         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9180           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9181               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9182               AlignmentSource::Decl));
9183           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9184         } else {
9185           CombinedInfo.Pointers.push_back(CV);
9186         }
9187       }
9188       if (I != FirstPrivateDecls.end())
9189         IsImplicit = I->getSecond();
9190     }
9191     // Every default map produces a single argument which is a target parameter.
9192     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9193 
9194     // Add flag stating this is an implicit map.
9195     if (IsImplicit)
9196       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9197 
9198     // No user-defined mapper for default mapping.
9199     CombinedInfo.Mappers.push_back(nullptr);
9200   }
9201 };
9202 } // anonymous namespace
9203 
9204 static void emitNonContiguousDescriptor(
9205     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9206     CGOpenMPRuntime::TargetDataInfo &Info) {
9207   CodeGenModule &CGM = CGF.CGM;
9208   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9209       &NonContigInfo = CombinedInfo.NonContigInfo;
9210 
9211   // Build an array of struct descriptor_dim and then assign it to
9212   // offload_args.
9213   //
9214   // struct descriptor_dim {
9215   //  uint64_t offset;
9216   //  uint64_t count;
9217   //  uint64_t stride
9218   // };
9219   ASTContext &C = CGF.getContext();
9220   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9221   RecordDecl *RD;
9222   RD = C.buildImplicitRecord("descriptor_dim");
9223   RD->startDefinition();
9224   addFieldToRecordDecl(C, RD, Int64Ty);
9225   addFieldToRecordDecl(C, RD, Int64Ty);
9226   addFieldToRecordDecl(C, RD, Int64Ty);
9227   RD->completeDefinition();
9228   QualType DimTy = C.getRecordType(RD);
9229 
9230   enum { OffsetFD = 0, CountFD, StrideFD };
9231   // We need two index variable here since the size of "Dims" is the same as the
9232   // size of Components, however, the size of offset, count, and stride is equal
9233   // to the size of base declaration that is non-contiguous.
9234   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9235     // Skip emitting ir if dimension size is 1 since it cannot be
9236     // non-contiguous.
9237     if (NonContigInfo.Dims[I] == 1)
9238       continue;
9239     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9240     QualType ArrayTy =
9241         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9242     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9243     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9244       unsigned RevIdx = EE - II - 1;
9245       LValue DimsLVal = CGF.MakeAddrLValue(
9246           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9247       // Offset
9248       LValue OffsetLVal = CGF.EmitLValueForField(
9249           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9250       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9251       // Count
9252       LValue CountLVal = CGF.EmitLValueForField(
9253           DimsLVal, *std::next(RD->field_begin(), CountFD));
9254       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9255       // Stride
9256       LValue StrideLVal = CGF.EmitLValueForField(
9257           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9258       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9259     }
9260     // args[I] = &dims
9261     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9262         DimsAddr, CGM.Int8PtrTy);
9263     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9264         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9265         Info.PointersArray, 0, I);
9266     Address PAddr(P, CGF.getPointerAlign());
9267     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9268     ++L;
9269   }
9270 }
9271 
9272 /// Emit a string constant containing the names of the values mapped to the
9273 /// offloading runtime library.
9274 llvm::Constant *
9275 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9276                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9277   llvm::Constant *SrcLocStr;
9278   if (!MapExprs.getMapDecl()) {
9279     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9280   } else {
9281     std::string ExprName = "";
9282     if (MapExprs.getMapExpr()) {
9283       PrintingPolicy P(CGF.getContext().getLangOpts());
9284       llvm::raw_string_ostream OS(ExprName);
9285       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9286       OS.flush();
9287     } else {
9288       ExprName = MapExprs.getMapDecl()->getNameAsString();
9289     }
9290 
9291     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9292     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9293     const char *FileName = PLoc.getFilename();
9294     unsigned Line = PLoc.getLine();
9295     unsigned Column = PLoc.getColumn();
9296     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9297                                                 Line, Column);
9298   }
9299   return SrcLocStr;
9300 }
9301 
9302 /// Emit the arrays used to pass the captures and map information to the
9303 /// offloading runtime library. If there is no map or capture information,
9304 /// return nullptr by reference.
9305 static void emitOffloadingArrays(
9306     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9307     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9308     bool IsNonContiguous = false) {
9309   CodeGenModule &CGM = CGF.CGM;
9310   ASTContext &Ctx = CGF.getContext();
9311 
9312   // Reset the array information.
9313   Info.clearArrayInfo();
9314   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9315 
9316   if (Info.NumberOfPtrs) {
9317     // Detect if we have any capture size requiring runtime evaluation of the
9318     // size so that a constant array could be eventually used.
9319     bool hasRuntimeEvaluationCaptureSize = false;
9320     for (llvm::Value *S : CombinedInfo.Sizes)
9321       if (!isa<llvm::Constant>(S)) {
9322         hasRuntimeEvaluationCaptureSize = true;
9323         break;
9324       }
9325 
9326     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9327     QualType PointerArrayType = Ctx.getConstantArrayType(
9328         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9329         /*IndexTypeQuals=*/0);
9330 
9331     Info.BasePointersArray =
9332         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9333     Info.PointersArray =
9334         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9335     Address MappersArray =
9336         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9337     Info.MappersArray = MappersArray.getPointer();
9338 
9339     // If we don't have any VLA types or other types that require runtime
9340     // evaluation, we can use a constant array for the map sizes, otherwise we
9341     // need to fill up the arrays as we do for the pointers.
9342     QualType Int64Ty =
9343         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9344     if (hasRuntimeEvaluationCaptureSize) {
9345       QualType SizeArrayType = Ctx.getConstantArrayType(
9346           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9347           /*IndexTypeQuals=*/0);
9348       Info.SizesArray =
9349           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9350     } else {
9351       // We expect all the sizes to be constant, so we collect them to create
9352       // a constant array.
9353       SmallVector<llvm::Constant *, 16> ConstSizes;
9354       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9355         if (IsNonContiguous &&
9356             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9357           ConstSizes.push_back(llvm::ConstantInt::get(
9358               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9359         } else {
9360           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9361         }
9362       }
9363 
9364       auto *SizesArrayInit = llvm::ConstantArray::get(
9365           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9366       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9367       auto *SizesArrayGbl = new llvm::GlobalVariable(
9368           CGM.getModule(), SizesArrayInit->getType(),
9369           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9370           SizesArrayInit, Name);
9371       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9372       Info.SizesArray = SizesArrayGbl;
9373     }
9374 
9375     // The map types are always constant so we don't need to generate code to
9376     // fill arrays. Instead, we create an array constant.
9377     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9378     llvm::copy(CombinedInfo.Types, Mapping.begin());
9379     llvm::Constant *MapTypesArrayInit =
9380         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9381     std::string MaptypesName =
9382         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9383     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9384         CGM.getModule(), MapTypesArrayInit->getType(),
9385         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9386         MapTypesArrayInit, MaptypesName);
9387     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9388     Info.MapTypesArray = MapTypesArrayGbl;
9389 
9390     // The information types are only built if there is debug information
9391     // requested.
9392     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9393       Info.MapNamesArray = llvm::Constant::getNullValue(
9394           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9395     } else {
9396       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9397         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9398       };
9399       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9400       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9401 
9402       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9403           llvm::ArrayType::get(
9404               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9405               CombinedInfo.Exprs.size()),
9406           InfoMap);
9407       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9408           CGM.getModule(), MapNamesArrayInit->getType(),
9409           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9410           MapNamesArrayInit,
9411           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9412       Info.MapNamesArray = MapNamesArrayGbl;
9413     }
9414 
9415     // If there's a present map type modifier, it must not be applied to the end
9416     // of a region, so generate a separate map type array in that case.
9417     if (Info.separateBeginEndCalls()) {
9418       bool EndMapTypesDiffer = false;
9419       for (uint64_t &Type : Mapping) {
9420         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9421           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9422           EndMapTypesDiffer = true;
9423         }
9424       }
9425       if (EndMapTypesDiffer) {
9426         MapTypesArrayInit =
9427             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9428         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9429         MapTypesArrayGbl = new llvm::GlobalVariable(
9430             CGM.getModule(), MapTypesArrayInit->getType(),
9431             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9432             MapTypesArrayInit, MaptypesName);
9433         MapTypesArrayGbl->setUnnamedAddr(
9434             llvm::GlobalValue::UnnamedAddr::Global);
9435         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9436       }
9437     }
9438 
9439     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9440       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9441       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9442           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9443           Info.BasePointersArray, 0, I);
9444       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9445           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9446       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9447       CGF.Builder.CreateStore(BPVal, BPAddr);
9448 
9449       if (Info.requiresDevicePointerInfo())
9450         if (const ValueDecl *DevVD =
9451                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9452           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9453 
9454       llvm::Value *PVal = CombinedInfo.Pointers[I];
9455       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9456           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9457           Info.PointersArray, 0, I);
9458       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9459           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9460       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9461       CGF.Builder.CreateStore(PVal, PAddr);
9462 
9463       if (hasRuntimeEvaluationCaptureSize) {
9464         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9465             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9466             Info.SizesArray,
9467             /*Idx0=*/0,
9468             /*Idx1=*/I);
9469         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9470         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9471                                                           CGM.Int64Ty,
9472                                                           /*isSigned=*/true),
9473                                 SAddr);
9474       }
9475 
9476       // Fill up the mapper array.
9477       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9478       if (CombinedInfo.Mappers[I]) {
9479         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9480             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9481         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9482         Info.HasMapper = true;
9483       }
9484       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9485       CGF.Builder.CreateStore(MFunc, MAddr);
9486     }
9487   }
9488 
9489   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9490       Info.NumberOfPtrs == 0)
9491     return;
9492 
9493   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9494 }
9495 
9496 namespace {
9497 /// Additional arguments for emitOffloadingArraysArgument function.
9498 struct ArgumentsOptions {
9499   bool ForEndCall = false;
9500   ArgumentsOptions() = default;
9501   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9502 };
9503 } // namespace
9504 
9505 /// Emit the arguments to be passed to the runtime library based on the
9506 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9507 /// ForEndCall, emit map types to be passed for the end of the region instead of
9508 /// the beginning.
9509 static void emitOffloadingArraysArgument(
9510     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9511     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9512     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9513     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9514     const ArgumentsOptions &Options = ArgumentsOptions()) {
9515   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9516          "expected region end call to runtime only when end call is separate");
9517   CodeGenModule &CGM = CGF.CGM;
9518   if (Info.NumberOfPtrs) {
9519     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9520         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9521         Info.BasePointersArray,
9522         /*Idx0=*/0, /*Idx1=*/0);
9523     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9524         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9525         Info.PointersArray,
9526         /*Idx0=*/0,
9527         /*Idx1=*/0);
9528     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9529         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9530         /*Idx0=*/0, /*Idx1=*/0);
9531     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9532         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9533         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9534                                                     : Info.MapTypesArray,
9535         /*Idx0=*/0,
9536         /*Idx1=*/0);
9537 
9538     // Only emit the mapper information arrays if debug information is
9539     // requested.
9540     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9541       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9542     else
9543       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9544           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9545           Info.MapNamesArray,
9546           /*Idx0=*/0,
9547           /*Idx1=*/0);
9548     // If there is no user-defined mapper, set the mapper array to nullptr to
9549     // avoid an unnecessary data privatization
9550     if (!Info.HasMapper)
9551       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9552     else
9553       MappersArrayArg =
9554           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9555   } else {
9556     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9557     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9558     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9559     MapTypesArrayArg =
9560         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9561     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9562     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9563   }
9564 }
9565 
9566 /// Check for inner distribute directive.
9567 static const OMPExecutableDirective *
9568 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9569   const auto *CS = D.getInnermostCapturedStmt();
9570   const auto *Body =
9571       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9572   const Stmt *ChildStmt =
9573       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9574 
9575   if (const auto *NestedDir =
9576           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9577     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9578     switch (D.getDirectiveKind()) {
9579     case OMPD_target:
9580       if (isOpenMPDistributeDirective(DKind))
9581         return NestedDir;
9582       if (DKind == OMPD_teams) {
9583         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9584             /*IgnoreCaptured=*/true);
9585         if (!Body)
9586           return nullptr;
9587         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9588         if (const auto *NND =
9589                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9590           DKind = NND->getDirectiveKind();
9591           if (isOpenMPDistributeDirective(DKind))
9592             return NND;
9593         }
9594       }
9595       return nullptr;
9596     case OMPD_target_teams:
9597       if (isOpenMPDistributeDirective(DKind))
9598         return NestedDir;
9599       return nullptr;
9600     case OMPD_target_parallel:
9601     case OMPD_target_simd:
9602     case OMPD_target_parallel_for:
9603     case OMPD_target_parallel_for_simd:
9604       return nullptr;
9605     case OMPD_target_teams_distribute:
9606     case OMPD_target_teams_distribute_simd:
9607     case OMPD_target_teams_distribute_parallel_for:
9608     case OMPD_target_teams_distribute_parallel_for_simd:
9609     case OMPD_parallel:
9610     case OMPD_for:
9611     case OMPD_parallel_for:
9612     case OMPD_parallel_master:
9613     case OMPD_parallel_sections:
9614     case OMPD_for_simd:
9615     case OMPD_parallel_for_simd:
9616     case OMPD_cancel:
9617     case OMPD_cancellation_point:
9618     case OMPD_ordered:
9619     case OMPD_threadprivate:
9620     case OMPD_allocate:
9621     case OMPD_task:
9622     case OMPD_simd:
9623     case OMPD_tile:
9624     case OMPD_sections:
9625     case OMPD_section:
9626     case OMPD_single:
9627     case OMPD_master:
9628     case OMPD_critical:
9629     case OMPD_taskyield:
9630     case OMPD_barrier:
9631     case OMPD_taskwait:
9632     case OMPD_taskgroup:
9633     case OMPD_atomic:
9634     case OMPD_flush:
9635     case OMPD_depobj:
9636     case OMPD_scan:
9637     case OMPD_teams:
9638     case OMPD_target_data:
9639     case OMPD_target_exit_data:
9640     case OMPD_target_enter_data:
9641     case OMPD_distribute:
9642     case OMPD_distribute_simd:
9643     case OMPD_distribute_parallel_for:
9644     case OMPD_distribute_parallel_for_simd:
9645     case OMPD_teams_distribute:
9646     case OMPD_teams_distribute_simd:
9647     case OMPD_teams_distribute_parallel_for:
9648     case OMPD_teams_distribute_parallel_for_simd:
9649     case OMPD_target_update:
9650     case OMPD_declare_simd:
9651     case OMPD_declare_variant:
9652     case OMPD_begin_declare_variant:
9653     case OMPD_end_declare_variant:
9654     case OMPD_declare_target:
9655     case OMPD_end_declare_target:
9656     case OMPD_declare_reduction:
9657     case OMPD_declare_mapper:
9658     case OMPD_taskloop:
9659     case OMPD_taskloop_simd:
9660     case OMPD_master_taskloop:
9661     case OMPD_master_taskloop_simd:
9662     case OMPD_parallel_master_taskloop:
9663     case OMPD_parallel_master_taskloop_simd:
9664     case OMPD_requires:
9665     case OMPD_unknown:
9666     default:
9667       llvm_unreachable("Unexpected directive.");
9668     }
9669   }
9670 
9671   return nullptr;
9672 }
9673 
9674 /// Emit the user-defined mapper function. The code generation follows the
9675 /// pattern in the example below.
9676 /// \code
9677 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9678 ///                                           void *base, void *begin,
9679 ///                                           int64_t size, int64_t type,
9680 ///                                           void *name = nullptr) {
9681 ///   // Allocate space for an array section first or add a base/begin for
9682 ///   // pointer dereference.
9683 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9684 ///       !maptype.IsDelete)
9685 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9686 ///                                 size*sizeof(Ty), clearToFromMember(type));
9687 ///   // Map members.
9688 ///   for (unsigned i = 0; i < size; i++) {
9689 ///     // For each component specified by this mapper:
9690 ///     for (auto c : begin[i]->all_components) {
9691 ///       if (c.hasMapper())
9692 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9693 ///                       c.arg_type, c.arg_name);
9694 ///       else
9695 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9696 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9697 ///                                     c.arg_name);
9698 ///     }
9699 ///   }
9700 ///   // Delete the array section.
9701 ///   if (size > 1 && maptype.IsDelete)
9702 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9703 ///                                 size*sizeof(Ty), clearToFromMember(type));
9704 /// }
9705 /// \endcode
9706 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9707                                             CodeGenFunction *CGF) {
9708   if (UDMMap.count(D) > 0)
9709     return;
9710   ASTContext &C = CGM.getContext();
9711   QualType Ty = D->getType();
9712   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9713   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9714   auto *MapperVarDecl =
9715       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9716   SourceLocation Loc = D->getLocation();
9717   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9718 
9719   // Prepare mapper function arguments and attributes.
9720   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9721                               C.VoidPtrTy, ImplicitParamDecl::Other);
9722   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9723                             ImplicitParamDecl::Other);
9724   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9725                              C.VoidPtrTy, ImplicitParamDecl::Other);
9726   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9727                             ImplicitParamDecl::Other);
9728   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9729                             ImplicitParamDecl::Other);
9730   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9731                             ImplicitParamDecl::Other);
9732   FunctionArgList Args;
9733   Args.push_back(&HandleArg);
9734   Args.push_back(&BaseArg);
9735   Args.push_back(&BeginArg);
9736   Args.push_back(&SizeArg);
9737   Args.push_back(&TypeArg);
9738   Args.push_back(&NameArg);
9739   const CGFunctionInfo &FnInfo =
9740       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9741   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9742   SmallString<64> TyStr;
9743   llvm::raw_svector_ostream Out(TyStr);
9744   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9745   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9746   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9747                                     Name, &CGM.getModule());
9748   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9749   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9750   // Start the mapper function code generation.
9751   CodeGenFunction MapperCGF(CGM);
9752   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9753   // Compute the starting and end addresses of array elements.
9754   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9755       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9756       C.getPointerType(Int64Ty), Loc);
9757   // Prepare common arguments for array initiation and deletion.
9758   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9759       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9760       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9761   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9762       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9763       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9764   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9765       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9766       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9767   // Convert the size in bytes into the number of array elements.
9768   Size = MapperCGF.Builder.CreateExactUDiv(
9769       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9770   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9771       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9772   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9773   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9774       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9775       C.getPointerType(Int64Ty), Loc);
9776   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9777       MapperCGF.GetAddrOfLocalVar(&NameArg),
9778       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9779 
9780   // Emit array initiation if this is an array section and \p MapType indicates
9781   // that memory allocation is required.
9782   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9783   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9784                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9785 
9786   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9787 
9788   // Emit the loop header block.
9789   MapperCGF.EmitBlock(HeadBB);
9790   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9791   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9792   // Evaluate whether the initial condition is satisfied.
9793   llvm::Value *IsEmpty =
9794       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9795   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9796   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9797 
9798   // Emit the loop body block.
9799   MapperCGF.EmitBlock(BodyBB);
9800   llvm::BasicBlock *LastBB = BodyBB;
9801   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9802       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9803   PtrPHI->addIncoming(PtrBegin, EntryBB);
9804   Address PtrCurrent =
9805       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9806                           .getAlignment()
9807                           .alignmentOfArrayElement(ElementSize));
9808   // Privatize the declared variable of mapper to be the current array element.
9809   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9810   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9811   (void)Scope.Privatize();
9812 
9813   // Get map clause information. Fill up the arrays with all mapped variables.
9814   MappableExprsHandler::MapCombinedInfoTy Info;
9815   MappableExprsHandler MEHandler(*D, MapperCGF);
9816   MEHandler.generateAllInfoForMapper(Info);
9817 
9818   // Call the runtime API __tgt_mapper_num_components to get the number of
9819   // pre-existing components.
9820   llvm::Value *OffloadingArgs[] = {Handle};
9821   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9822       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9823                                             OMPRTL___tgt_mapper_num_components),
9824       OffloadingArgs);
9825   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9826       PreviousSize,
9827       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9828 
9829   // Fill up the runtime mapper handle for all components.
9830   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9831     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9832         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9833     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9834         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9835     llvm::Value *CurSizeArg = Info.Sizes[I];
9836     llvm::Value *CurNameArg =
9837         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9838             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9839             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9840 
9841     // Extract the MEMBER_OF field from the map type.
9842     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9843     llvm::Value *MemberMapType =
9844         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9845 
9846     // Combine the map type inherited from user-defined mapper with that
9847     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9848     // bits of the \a MapType, which is the input argument of the mapper
9849     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9850     // bits of MemberMapType.
9851     // [OpenMP 5.0], 1.2.6. map-type decay.
9852     //        | alloc |  to   | from  | tofrom | release | delete
9853     // ----------------------------------------------------------
9854     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9855     // to     | alloc |  to   | alloc |   to   | release | delete
9856     // from   | alloc | alloc | from  |  from  | release | delete
9857     // tofrom | alloc |  to   | from  | tofrom | release | delete
9858     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9859         MapType,
9860         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9861                                    MappableExprsHandler::OMP_MAP_FROM));
9862     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9863     llvm::BasicBlock *AllocElseBB =
9864         MapperCGF.createBasicBlock("omp.type.alloc.else");
9865     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9866     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9867     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9868     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9869     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9870     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9871     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9872     MapperCGF.EmitBlock(AllocBB);
9873     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9874         MemberMapType,
9875         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9876                                      MappableExprsHandler::OMP_MAP_FROM)));
9877     MapperCGF.Builder.CreateBr(EndBB);
9878     MapperCGF.EmitBlock(AllocElseBB);
9879     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9880         LeftToFrom,
9881         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9882     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9883     // In case of to, clear OMP_MAP_FROM.
9884     MapperCGF.EmitBlock(ToBB);
9885     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9886         MemberMapType,
9887         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9888     MapperCGF.Builder.CreateBr(EndBB);
9889     MapperCGF.EmitBlock(ToElseBB);
9890     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9891         LeftToFrom,
9892         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9893     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9894     // In case of from, clear OMP_MAP_TO.
9895     MapperCGF.EmitBlock(FromBB);
9896     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9897         MemberMapType,
9898         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9899     // In case of tofrom, do nothing.
9900     MapperCGF.EmitBlock(EndBB);
9901     LastBB = EndBB;
9902     llvm::PHINode *CurMapType =
9903         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9904     CurMapType->addIncoming(AllocMapType, AllocBB);
9905     CurMapType->addIncoming(ToMapType, ToBB);
9906     CurMapType->addIncoming(FromMapType, FromBB);
9907     CurMapType->addIncoming(MemberMapType, ToElseBB);
9908 
9909     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9910                                      CurSizeArg, CurMapType, CurNameArg};
9911     if (Info.Mappers[I]) {
9912       // Call the corresponding mapper function.
9913       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9914           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9915       assert(MapperFunc && "Expect a valid mapper function is available.");
9916       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9917     } else {
9918       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9919       // data structure.
9920       MapperCGF.EmitRuntimeCall(
9921           OMPBuilder.getOrCreateRuntimeFunction(
9922               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9923           OffloadingArgs);
9924     }
9925   }
9926 
9927   // Update the pointer to point to the next element that needs to be mapped,
9928   // and check whether we have mapped all elements.
9929   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9930       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9931   PtrPHI->addIncoming(PtrNext, LastBB);
9932   llvm::Value *IsDone =
9933       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9934   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9935   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9936 
9937   MapperCGF.EmitBlock(ExitBB);
9938   // Emit array deletion if this is an array section and \p MapType indicates
9939   // that deletion is required.
9940   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9941                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9942 
9943   // Emit the function exit block.
9944   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9945   MapperCGF.FinishFunction();
9946   UDMMap.try_emplace(D, Fn);
9947   if (CGF) {
9948     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9949     Decls.second.push_back(D);
9950   }
9951 }
9952 
9953 /// Emit the array initialization or deletion portion for user-defined mapper
9954 /// code generation. First, it evaluates whether an array section is mapped and
9955 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9956 /// true, and \a MapType indicates to not delete this array, array
9957 /// initialization code is generated. If \a IsInit is false, and \a MapType
9958 /// indicates to not this array, array deletion code is generated.
9959 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9960     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9961     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9962     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9963     bool IsInit) {
9964   StringRef Prefix = IsInit ? ".init" : ".del";
9965 
9966   // Evaluate if this is an array section.
9967   llvm::BasicBlock *BodyBB =
9968       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9969   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9970       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9971   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9972       MapType,
9973       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9974   llvm::Value *DeleteCond;
9975   llvm::Value *Cond;
9976   if (IsInit) {
9977     // base != begin?
9978     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9979         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9980     // IsPtrAndObj?
9981     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9982         MapType,
9983         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9984     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9985     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9986     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9987     DeleteCond = MapperCGF.Builder.CreateIsNull(
9988         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9989   } else {
9990     Cond = IsArray;
9991     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9992         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9993   }
9994   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9995   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9996 
9997   MapperCGF.EmitBlock(BodyBB);
9998   // Get the array size by multiplying element size and element number (i.e., \p
9999   // Size).
10000   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10001       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10002   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10003   // memory allocation/deletion purpose only.
10004   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10005       MapType,
10006       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10007                                    MappableExprsHandler::OMP_MAP_FROM)));
10008   MapTypeArg = MapperCGF.Builder.CreateOr(
10009       MapTypeArg,
10010       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10011 
10012   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10013   // data structure.
10014   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10015                                    ArraySize, MapTypeArg, MapName};
10016   MapperCGF.EmitRuntimeCall(
10017       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10018                                             OMPRTL___tgt_push_mapper_component),
10019       OffloadingArgs);
10020 }
10021 
10022 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10023     const OMPDeclareMapperDecl *D) {
10024   auto I = UDMMap.find(D);
10025   if (I != UDMMap.end())
10026     return I->second;
10027   emitUserDefinedMapper(D);
10028   return UDMMap.lookup(D);
10029 }
10030 
10031 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10032     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10033     llvm::Value *DeviceID,
10034     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10035                                      const OMPLoopDirective &D)>
10036         SizeEmitter) {
10037   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10038   const OMPExecutableDirective *TD = &D;
10039   // Get nested teams distribute kind directive, if any.
10040   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10041     TD = getNestedDistributeDirective(CGM.getContext(), D);
10042   if (!TD)
10043     return;
10044   const auto *LD = cast<OMPLoopDirective>(TD);
10045   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10046                                                          PrePostActionTy &) {
10047     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10048       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10049       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10050       CGF.EmitRuntimeCall(
10051           OMPBuilder.getOrCreateRuntimeFunction(
10052               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10053           Args);
10054     }
10055   };
10056   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10057 }
10058 
10059 void CGOpenMPRuntime::emitTargetCall(
10060     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10061     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10062     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10063     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10064                                      const OMPLoopDirective &D)>
10065         SizeEmitter) {
10066   if (!CGF.HaveInsertPoint())
10067     return;
10068 
10069   assert(OutlinedFn && "Invalid outlined function!");
10070 
10071   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10072                                  D.hasClausesOfKind<OMPNowaitClause>();
10073   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10074   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10075   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10076                                             PrePostActionTy &) {
10077     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10078   };
10079   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10080 
10081   CodeGenFunction::OMPTargetDataInfo InputInfo;
10082   llvm::Value *MapTypesArray = nullptr;
10083   llvm::Value *MapNamesArray = nullptr;
10084   // Fill up the pointer arrays and transfer execution to the device.
10085   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10086                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10087                     &CapturedVars,
10088                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10089     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10090       // Reverse offloading is not supported, so just execute on the host.
10091       if (RequiresOuterTask) {
10092         CapturedVars.clear();
10093         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10094       }
10095       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10096       return;
10097     }
10098 
10099     // On top of the arrays that were filled up, the target offloading call
10100     // takes as arguments the device id as well as the host pointer. The host
10101     // pointer is used by the runtime library to identify the current target
10102     // region, so it only has to be unique and not necessarily point to
10103     // anything. It could be the pointer to the outlined function that
10104     // implements the target region, but we aren't using that so that the
10105     // compiler doesn't need to keep that, and could therefore inline the host
10106     // function if proven worthwhile during optimization.
10107 
10108     // From this point on, we need to have an ID of the target region defined.
10109     assert(OutlinedFnID && "Invalid outlined function ID!");
10110 
10111     // Emit device ID if any.
10112     llvm::Value *DeviceID;
10113     if (Device.getPointer()) {
10114       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10115               Device.getInt() == OMPC_DEVICE_device_num) &&
10116              "Expected device_num modifier.");
10117       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10118       DeviceID =
10119           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10120     } else {
10121       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10122     }
10123 
10124     // Emit the number of elements in the offloading arrays.
10125     llvm::Value *PointerNum =
10126         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10127 
10128     // Return value of the runtime offloading call.
10129     llvm::Value *Return;
10130 
10131     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10132     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10133 
10134     // Source location for the ident struct
10135     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10136 
10137     // Emit tripcount for the target loop-based directive.
10138     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10139 
10140     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10141     // The target region is an outlined function launched by the runtime
10142     // via calls __tgt_target() or __tgt_target_teams().
10143     //
10144     // __tgt_target() launches a target region with one team and one thread,
10145     // executing a serial region.  This master thread may in turn launch
10146     // more threads within its team upon encountering a parallel region,
10147     // however, no additional teams can be launched on the device.
10148     //
10149     // __tgt_target_teams() launches a target region with one or more teams,
10150     // each with one or more threads.  This call is required for target
10151     // constructs such as:
10152     //  'target teams'
10153     //  'target' / 'teams'
10154     //  'target teams distribute parallel for'
10155     //  'target parallel'
10156     // and so on.
10157     //
10158     // Note that on the host and CPU targets, the runtime implementation of
10159     // these calls simply call the outlined function without forking threads.
10160     // The outlined functions themselves have runtime calls to
10161     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10162     // the compiler in emitTeamsCall() and emitParallelCall().
10163     //
10164     // In contrast, on the NVPTX target, the implementation of
10165     // __tgt_target_teams() launches a GPU kernel with the requested number
10166     // of teams and threads so no additional calls to the runtime are required.
10167     if (NumTeams) {
10168       // If we have NumTeams defined this means that we have an enclosed teams
10169       // region. Therefore we also expect to have NumThreads defined. These two
10170       // values should be defined in the presence of a teams directive,
10171       // regardless of having any clauses associated. If the user is using teams
10172       // but no clauses, these two values will be the default that should be
10173       // passed to the runtime library - a 32-bit integer with the value zero.
10174       assert(NumThreads && "Thread limit expression should be available along "
10175                            "with number of teams.");
10176       llvm::Value *OffloadingArgs[] = {RTLoc,
10177                                        DeviceID,
10178                                        OutlinedFnID,
10179                                        PointerNum,
10180                                        InputInfo.BasePointersArray.getPointer(),
10181                                        InputInfo.PointersArray.getPointer(),
10182                                        InputInfo.SizesArray.getPointer(),
10183                                        MapTypesArray,
10184                                        MapNamesArray,
10185                                        InputInfo.MappersArray.getPointer(),
10186                                        NumTeams,
10187                                        NumThreads};
10188       Return = CGF.EmitRuntimeCall(
10189           OMPBuilder.getOrCreateRuntimeFunction(
10190               CGM.getModule(), HasNowait
10191                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10192                                    : OMPRTL___tgt_target_teams_mapper),
10193           OffloadingArgs);
10194     } else {
10195       llvm::Value *OffloadingArgs[] = {RTLoc,
10196                                        DeviceID,
10197                                        OutlinedFnID,
10198                                        PointerNum,
10199                                        InputInfo.BasePointersArray.getPointer(),
10200                                        InputInfo.PointersArray.getPointer(),
10201                                        InputInfo.SizesArray.getPointer(),
10202                                        MapTypesArray,
10203                                        MapNamesArray,
10204                                        InputInfo.MappersArray.getPointer()};
10205       Return = CGF.EmitRuntimeCall(
10206           OMPBuilder.getOrCreateRuntimeFunction(
10207               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10208                                          : OMPRTL___tgt_target_mapper),
10209           OffloadingArgs);
10210     }
10211 
10212     // Check the error code and execute the host version if required.
10213     llvm::BasicBlock *OffloadFailedBlock =
10214         CGF.createBasicBlock("omp_offload.failed");
10215     llvm::BasicBlock *OffloadContBlock =
10216         CGF.createBasicBlock("omp_offload.cont");
10217     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10218     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10219 
10220     CGF.EmitBlock(OffloadFailedBlock);
10221     if (RequiresOuterTask) {
10222       CapturedVars.clear();
10223       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10224     }
10225     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10226     CGF.EmitBranch(OffloadContBlock);
10227 
10228     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10229   };
10230 
10231   // Notify that the host version must be executed.
10232   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10233                     RequiresOuterTask](CodeGenFunction &CGF,
10234                                        PrePostActionTy &) {
10235     if (RequiresOuterTask) {
10236       CapturedVars.clear();
10237       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10238     }
10239     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10240   };
10241 
10242   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10243                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10244                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10245     // Fill up the arrays with all the captured variables.
10246     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10247 
10248     // Get mappable expression information.
10249     MappableExprsHandler MEHandler(D, CGF);
10250     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10251     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10252 
10253     auto RI = CS.getCapturedRecordDecl()->field_begin();
10254     auto *CV = CapturedVars.begin();
10255     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10256                                               CE = CS.capture_end();
10257          CI != CE; ++CI, ++RI, ++CV) {
10258       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10259       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10260 
10261       // VLA sizes are passed to the outlined region by copy and do not have map
10262       // information associated.
10263       if (CI->capturesVariableArrayType()) {
10264         CurInfo.Exprs.push_back(nullptr);
10265         CurInfo.BasePointers.push_back(*CV);
10266         CurInfo.Pointers.push_back(*CV);
10267         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10268             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10269         // Copy to the device as an argument. No need to retrieve it.
10270         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10271                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10272                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10273         CurInfo.Mappers.push_back(nullptr);
10274       } else {
10275         // If we have any information in the map clause, we use it, otherwise we
10276         // just do a default mapping.
10277         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10278         if (!CI->capturesThis())
10279           MappedVarSet.insert(CI->getCapturedVar());
10280         else
10281           MappedVarSet.insert(nullptr);
10282         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10283           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10284         // Generate correct mapping for variables captured by reference in
10285         // lambdas.
10286         if (CI->capturesVariable())
10287           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10288                                                   CurInfo, LambdaPointers);
10289       }
10290       // We expect to have at least an element of information for this capture.
10291       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10292              "Non-existing map pointer for capture!");
10293       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10294              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10295              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10296              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10297              "Inconsistent map information sizes!");
10298 
10299       // If there is an entry in PartialStruct it means we have a struct with
10300       // individual members mapped. Emit an extra combined entry.
10301       if (PartialStruct.Base.isValid()) {
10302         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10303         MEHandler.emitCombinedEntry(
10304             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10305             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10306       }
10307 
10308       // We need to append the results of this capture to what we already have.
10309       CombinedInfo.append(CurInfo);
10310     }
10311     // Adjust MEMBER_OF flags for the lambdas captures.
10312     MEHandler.adjustMemberOfForLambdaCaptures(
10313         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10314         CombinedInfo.Types);
10315     // Map any list items in a map clause that were not captures because they
10316     // weren't referenced within the construct.
10317     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10318 
10319     TargetDataInfo Info;
10320     // Fill up the arrays and create the arguments.
10321     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10322     emitOffloadingArraysArgument(
10323         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10324         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10325         {/*ForEndTask=*/false});
10326 
10327     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10328     InputInfo.BasePointersArray =
10329         Address(Info.BasePointersArray, CGM.getPointerAlign());
10330     InputInfo.PointersArray =
10331         Address(Info.PointersArray, CGM.getPointerAlign());
10332     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10333     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10334     MapTypesArray = Info.MapTypesArray;
10335     MapNamesArray = Info.MapNamesArray;
10336     if (RequiresOuterTask)
10337       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10338     else
10339       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10340   };
10341 
10342   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10343                              CodeGenFunction &CGF, PrePostActionTy &) {
10344     if (RequiresOuterTask) {
10345       CodeGenFunction::OMPTargetDataInfo InputInfo;
10346       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10347     } else {
10348       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10349     }
10350   };
10351 
10352   // If we have a target function ID it means that we need to support
10353   // offloading, otherwise, just execute on the host. We need to execute on host
10354   // regardless of the conditional in the if clause if, e.g., the user do not
10355   // specify target triples.
10356   if (OutlinedFnID) {
10357     if (IfCond) {
10358       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10359     } else {
10360       RegionCodeGenTy ThenRCG(TargetThenGen);
10361       ThenRCG(CGF);
10362     }
10363   } else {
10364     RegionCodeGenTy ElseRCG(TargetElseGen);
10365     ElseRCG(CGF);
10366   }
10367 }
10368 
10369 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10370                                                     StringRef ParentName) {
10371   if (!S)
10372     return;
10373 
10374   // Codegen OMP target directives that offload compute to the device.
10375   bool RequiresDeviceCodegen =
10376       isa<OMPExecutableDirective>(S) &&
10377       isOpenMPTargetExecutionDirective(
10378           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10379 
10380   if (RequiresDeviceCodegen) {
10381     const auto &E = *cast<OMPExecutableDirective>(S);
10382     unsigned DeviceID;
10383     unsigned FileID;
10384     unsigned Line;
10385     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10386                              FileID, Line);
10387 
10388     // Is this a target region that should not be emitted as an entry point? If
10389     // so just signal we are done with this target region.
10390     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10391                                                             ParentName, Line))
10392       return;
10393 
10394     switch (E.getDirectiveKind()) {
10395     case OMPD_target:
10396       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10397                                                    cast<OMPTargetDirective>(E));
10398       break;
10399     case OMPD_target_parallel:
10400       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10401           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10402       break;
10403     case OMPD_target_teams:
10404       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10405           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10406       break;
10407     case OMPD_target_teams_distribute:
10408       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10409           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10410       break;
10411     case OMPD_target_teams_distribute_simd:
10412       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10413           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10414       break;
10415     case OMPD_target_parallel_for:
10416       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10417           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10418       break;
10419     case OMPD_target_parallel_for_simd:
10420       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10421           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10422       break;
10423     case OMPD_target_simd:
10424       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10425           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10426       break;
10427     case OMPD_target_teams_distribute_parallel_for:
10428       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10429           CGM, ParentName,
10430           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10431       break;
10432     case OMPD_target_teams_distribute_parallel_for_simd:
10433       CodeGenFunction::
10434           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10435               CGM, ParentName,
10436               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10437       break;
10438     case OMPD_parallel:
10439     case OMPD_for:
10440     case OMPD_parallel_for:
10441     case OMPD_parallel_master:
10442     case OMPD_parallel_sections:
10443     case OMPD_for_simd:
10444     case OMPD_parallel_for_simd:
10445     case OMPD_cancel:
10446     case OMPD_cancellation_point:
10447     case OMPD_ordered:
10448     case OMPD_threadprivate:
10449     case OMPD_allocate:
10450     case OMPD_task:
10451     case OMPD_simd:
10452     case OMPD_tile:
10453     case OMPD_sections:
10454     case OMPD_section:
10455     case OMPD_single:
10456     case OMPD_master:
10457     case OMPD_critical:
10458     case OMPD_taskyield:
10459     case OMPD_barrier:
10460     case OMPD_taskwait:
10461     case OMPD_taskgroup:
10462     case OMPD_atomic:
10463     case OMPD_flush:
10464     case OMPD_depobj:
10465     case OMPD_scan:
10466     case OMPD_teams:
10467     case OMPD_target_data:
10468     case OMPD_target_exit_data:
10469     case OMPD_target_enter_data:
10470     case OMPD_distribute:
10471     case OMPD_distribute_simd:
10472     case OMPD_distribute_parallel_for:
10473     case OMPD_distribute_parallel_for_simd:
10474     case OMPD_teams_distribute:
10475     case OMPD_teams_distribute_simd:
10476     case OMPD_teams_distribute_parallel_for:
10477     case OMPD_teams_distribute_parallel_for_simd:
10478     case OMPD_target_update:
10479     case OMPD_declare_simd:
10480     case OMPD_declare_variant:
10481     case OMPD_begin_declare_variant:
10482     case OMPD_end_declare_variant:
10483     case OMPD_declare_target:
10484     case OMPD_end_declare_target:
10485     case OMPD_declare_reduction:
10486     case OMPD_declare_mapper:
10487     case OMPD_taskloop:
10488     case OMPD_taskloop_simd:
10489     case OMPD_master_taskloop:
10490     case OMPD_master_taskloop_simd:
10491     case OMPD_parallel_master_taskloop:
10492     case OMPD_parallel_master_taskloop_simd:
10493     case OMPD_requires:
10494     case OMPD_unknown:
10495     default:
10496       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10497     }
10498     return;
10499   }
10500 
10501   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10502     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10503       return;
10504 
10505     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10506     return;
10507   }
10508 
10509   // If this is a lambda function, look into its body.
10510   if (const auto *L = dyn_cast<LambdaExpr>(S))
10511     S = L->getBody();
10512 
10513   // Keep looking for target regions recursively.
10514   for (const Stmt *II : S->children())
10515     scanForTargetRegionsFunctions(II, ParentName);
10516 }
10517 
10518 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10519   // If emitting code for the host, we do not process FD here. Instead we do
10520   // the normal code generation.
10521   if (!CGM.getLangOpts().OpenMPIsDevice) {
10522     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10523       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10524           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10525       // Do not emit device_type(nohost) functions for the host.
10526       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10527         return true;
10528     }
10529     return false;
10530   }
10531 
10532   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10533   // Try to detect target regions in the function.
10534   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10535     StringRef Name = CGM.getMangledName(GD);
10536     scanForTargetRegionsFunctions(FD->getBody(), Name);
10537     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10538         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10539     // Do not emit device_type(nohost) functions for the host.
10540     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10541       return true;
10542   }
10543 
10544   // Do not to emit function if it is not marked as declare target.
10545   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10546          AlreadyEmittedTargetDecls.count(VD) == 0;
10547 }
10548 
10549 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10550   if (!CGM.getLangOpts().OpenMPIsDevice)
10551     return false;
10552 
10553   // Check if there are Ctors/Dtors in this declaration and look for target
10554   // regions in it. We use the complete variant to produce the kernel name
10555   // mangling.
10556   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10557   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10558     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10559       StringRef ParentName =
10560           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10561       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10562     }
10563     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10564       StringRef ParentName =
10565           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10566       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10567     }
10568   }
10569 
10570   // Do not to emit variable if it is not marked as declare target.
10571   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10572       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10573           cast<VarDecl>(GD.getDecl()));
10574   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10575       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10576        HasRequiresUnifiedSharedMemory)) {
10577     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10578     return true;
10579   }
10580   return false;
10581 }
10582 
10583 llvm::Constant *
10584 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10585                                                 const VarDecl *VD) {
10586   assert(VD->getType().isConstant(CGM.getContext()) &&
10587          "Expected constant variable.");
10588   StringRef VarName;
10589   llvm::Constant *Addr;
10590   llvm::GlobalValue::LinkageTypes Linkage;
10591   QualType Ty = VD->getType();
10592   SmallString<128> Buffer;
10593   {
10594     unsigned DeviceID;
10595     unsigned FileID;
10596     unsigned Line;
10597     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10598                              FileID, Line);
10599     llvm::raw_svector_ostream OS(Buffer);
10600     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10601        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10602     VarName = OS.str();
10603   }
10604   Linkage = llvm::GlobalValue::InternalLinkage;
10605   Addr =
10606       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10607                                   getDefaultFirstprivateAddressSpace());
10608   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10609   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10610   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10611   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10612       VarName, Addr, VarSize,
10613       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10614   return Addr;
10615 }
10616 
10617 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10618                                                    llvm::Constant *Addr) {
10619   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10620       !CGM.getLangOpts().OpenMPIsDevice)
10621     return;
10622   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10623       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10624   if (!Res) {
10625     if (CGM.getLangOpts().OpenMPIsDevice) {
10626       // Register non-target variables being emitted in device code (debug info
10627       // may cause this).
10628       StringRef VarName = CGM.getMangledName(VD);
10629       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10630     }
10631     return;
10632   }
10633   // Register declare target variables.
10634   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10635   StringRef VarName;
10636   CharUnits VarSize;
10637   llvm::GlobalValue::LinkageTypes Linkage;
10638 
10639   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10640       !HasRequiresUnifiedSharedMemory) {
10641     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10642     VarName = CGM.getMangledName(VD);
10643     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10644       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10645       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10646     } else {
10647       VarSize = CharUnits::Zero();
10648     }
10649     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10650     // Temp solution to prevent optimizations of the internal variables.
10651     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10652       std::string RefName = getName({VarName, "ref"});
10653       if (!CGM.GetGlobalValue(RefName)) {
10654         llvm::Constant *AddrRef =
10655             getOrCreateInternalVariable(Addr->getType(), RefName);
10656         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10657         GVAddrRef->setConstant(/*Val=*/true);
10658         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10659         GVAddrRef->setInitializer(Addr);
10660         CGM.addCompilerUsedGlobal(GVAddrRef);
10661       }
10662     }
10663   } else {
10664     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10665             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10666              HasRequiresUnifiedSharedMemory)) &&
10667            "Declare target attribute must link or to with unified memory.");
10668     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10669       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10670     else
10671       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10672 
10673     if (CGM.getLangOpts().OpenMPIsDevice) {
10674       VarName = Addr->getName();
10675       Addr = nullptr;
10676     } else {
10677       VarName = getAddrOfDeclareTargetVar(VD).getName();
10678       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10679     }
10680     VarSize = CGM.getPointerSize();
10681     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10682   }
10683 
10684   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10685       VarName, Addr, VarSize, Flags, Linkage);
10686 }
10687 
10688 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10689   if (isa<FunctionDecl>(GD.getDecl()) ||
10690       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10691     return emitTargetFunctions(GD);
10692 
10693   return emitTargetGlobalVariable(GD);
10694 }
10695 
10696 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10697   for (const VarDecl *VD : DeferredGlobalVariables) {
10698     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10699         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10700     if (!Res)
10701       continue;
10702     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10703         !HasRequiresUnifiedSharedMemory) {
10704       CGM.EmitGlobal(VD);
10705     } else {
10706       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10707               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10708                HasRequiresUnifiedSharedMemory)) &&
10709              "Expected link clause or to clause with unified memory.");
10710       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10711     }
10712   }
10713 }
10714 
10715 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10716     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10717   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10718          " Expected target-based directive.");
10719 }
10720 
10721 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10722   for (const OMPClause *Clause : D->clauselists()) {
10723     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10724       HasRequiresUnifiedSharedMemory = true;
10725     } else if (const auto *AC =
10726                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10727       switch (AC->getAtomicDefaultMemOrderKind()) {
10728       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10729         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10730         break;
10731       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10732         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10733         break;
10734       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10735         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10736         break;
10737       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10738         break;
10739       }
10740     }
10741   }
10742 }
10743 
10744 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10745   return RequiresAtomicOrdering;
10746 }
10747 
10748 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10749                                                        LangAS &AS) {
10750   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10751     return false;
10752   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10753   switch(A->getAllocatorType()) {
10754   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10755   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10756   // Not supported, fallback to the default mem space.
10757   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10758   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10759   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10760   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10761   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10762   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10763   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10764     AS = LangAS::Default;
10765     return true;
10766   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10767     llvm_unreachable("Expected predefined allocator for the variables with the "
10768                      "static storage.");
10769   }
10770   return false;
10771 }
10772 
10773 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10774   return HasRequiresUnifiedSharedMemory;
10775 }
10776 
10777 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10778     CodeGenModule &CGM)
10779     : CGM(CGM) {
10780   if (CGM.getLangOpts().OpenMPIsDevice) {
10781     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10782     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10783   }
10784 }
10785 
10786 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10787   if (CGM.getLangOpts().OpenMPIsDevice)
10788     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10789 }
10790 
10791 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10792   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10793     return true;
10794 
10795   const auto *D = cast<FunctionDecl>(GD.getDecl());
10796   // Do not to emit function if it is marked as declare target as it was already
10797   // emitted.
10798   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10799     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10800       if (auto *F = dyn_cast_or_null<llvm::Function>(
10801               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10802         return !F->isDeclaration();
10803       return false;
10804     }
10805     return true;
10806   }
10807 
10808   return !AlreadyEmittedTargetDecls.insert(D).second;
10809 }
10810 
10811 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10812   // If we don't have entries or if we are emitting code for the device, we
10813   // don't need to do anything.
10814   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10815       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10816       (OffloadEntriesInfoManager.empty() &&
10817        !HasEmittedDeclareTargetRegion &&
10818        !HasEmittedTargetRegion))
10819     return nullptr;
10820 
10821   // Create and register the function that handles the requires directives.
10822   ASTContext &C = CGM.getContext();
10823 
10824   llvm::Function *RequiresRegFn;
10825   {
10826     CodeGenFunction CGF(CGM);
10827     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10828     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10829     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10830     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10831     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10832     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10833     // TODO: check for other requires clauses.
10834     // The requires directive takes effect only when a target region is
10835     // present in the compilation unit. Otherwise it is ignored and not
10836     // passed to the runtime. This avoids the runtime from throwing an error
10837     // for mismatching requires clauses across compilation units that don't
10838     // contain at least 1 target region.
10839     assert((HasEmittedTargetRegion ||
10840             HasEmittedDeclareTargetRegion ||
10841             !OffloadEntriesInfoManager.empty()) &&
10842            "Target or declare target region expected.");
10843     if (HasRequiresUnifiedSharedMemory)
10844       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10846                             CGM.getModule(), OMPRTL___tgt_register_requires),
10847                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10848     CGF.FinishFunction();
10849   }
10850   return RequiresRegFn;
10851 }
10852 
10853 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10854                                     const OMPExecutableDirective &D,
10855                                     SourceLocation Loc,
10856                                     llvm::Function *OutlinedFn,
10857                                     ArrayRef<llvm::Value *> CapturedVars) {
10858   if (!CGF.HaveInsertPoint())
10859     return;
10860 
10861   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10862   CodeGenFunction::RunCleanupsScope Scope(CGF);
10863 
10864   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10865   llvm::Value *Args[] = {
10866       RTLoc,
10867       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10868       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10869   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10870   RealArgs.append(std::begin(Args), std::end(Args));
10871   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10872 
10873   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10874       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10875   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10876 }
10877 
10878 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10879                                          const Expr *NumTeams,
10880                                          const Expr *ThreadLimit,
10881                                          SourceLocation Loc) {
10882   if (!CGF.HaveInsertPoint())
10883     return;
10884 
10885   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10886 
10887   llvm::Value *NumTeamsVal =
10888       NumTeams
10889           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10890                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10891           : CGF.Builder.getInt32(0);
10892 
10893   llvm::Value *ThreadLimitVal =
10894       ThreadLimit
10895           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10896                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10897           : CGF.Builder.getInt32(0);
10898 
10899   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10900   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10901                                      ThreadLimitVal};
10902   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10903                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10904                       PushNumTeamsArgs);
10905 }
10906 
10907 void CGOpenMPRuntime::emitTargetDataCalls(
10908     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10909     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10910   if (!CGF.HaveInsertPoint())
10911     return;
10912 
10913   // Action used to replace the default codegen action and turn privatization
10914   // off.
10915   PrePostActionTy NoPrivAction;
10916 
10917   // Generate the code for the opening of the data environment. Capture all the
10918   // arguments of the runtime call by reference because they are used in the
10919   // closing of the region.
10920   auto &&BeginThenGen = [this, &D, Device, &Info,
10921                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10922     // Fill up the arrays with all the mapped variables.
10923     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10924 
10925     // Get map clause information.
10926     MappableExprsHandler MEHandler(D, CGF);
10927     MEHandler.generateAllInfo(CombinedInfo);
10928 
10929     // Fill up the arrays and create the arguments.
10930     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10931                          /*IsNonContiguous=*/true);
10932 
10933     llvm::Value *BasePointersArrayArg = nullptr;
10934     llvm::Value *PointersArrayArg = nullptr;
10935     llvm::Value *SizesArrayArg = nullptr;
10936     llvm::Value *MapTypesArrayArg = nullptr;
10937     llvm::Value *MapNamesArrayArg = nullptr;
10938     llvm::Value *MappersArrayArg = nullptr;
10939     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10940                                  SizesArrayArg, MapTypesArrayArg,
10941                                  MapNamesArrayArg, MappersArrayArg, Info);
10942 
10943     // Emit device ID if any.
10944     llvm::Value *DeviceID = nullptr;
10945     if (Device) {
10946       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10947                                            CGF.Int64Ty, /*isSigned=*/true);
10948     } else {
10949       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10950     }
10951 
10952     // Emit the number of elements in the offloading arrays.
10953     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10954     //
10955     // Source location for the ident struct
10956     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10957 
10958     llvm::Value *OffloadingArgs[] = {RTLoc,
10959                                      DeviceID,
10960                                      PointerNum,
10961                                      BasePointersArrayArg,
10962                                      PointersArrayArg,
10963                                      SizesArrayArg,
10964                                      MapTypesArrayArg,
10965                                      MapNamesArrayArg,
10966                                      MappersArrayArg};
10967     CGF.EmitRuntimeCall(
10968         OMPBuilder.getOrCreateRuntimeFunction(
10969             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10970         OffloadingArgs);
10971 
10972     // If device pointer privatization is required, emit the body of the region
10973     // here. It will have to be duplicated: with and without privatization.
10974     if (!Info.CaptureDeviceAddrMap.empty())
10975       CodeGen(CGF);
10976   };
10977 
10978   // Generate code for the closing of the data region.
10979   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10980                                                 PrePostActionTy &) {
10981     assert(Info.isValid() && "Invalid data environment closing arguments.");
10982 
10983     llvm::Value *BasePointersArrayArg = nullptr;
10984     llvm::Value *PointersArrayArg = nullptr;
10985     llvm::Value *SizesArrayArg = nullptr;
10986     llvm::Value *MapTypesArrayArg = nullptr;
10987     llvm::Value *MapNamesArrayArg = nullptr;
10988     llvm::Value *MappersArrayArg = nullptr;
10989     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10990                                  SizesArrayArg, MapTypesArrayArg,
10991                                  MapNamesArrayArg, MappersArrayArg, Info,
10992                                  {/*ForEndCall=*/true});
10993 
10994     // Emit device ID if any.
10995     llvm::Value *DeviceID = nullptr;
10996     if (Device) {
10997       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10998                                            CGF.Int64Ty, /*isSigned=*/true);
10999     } else {
11000       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11001     }
11002 
11003     // Emit the number of elements in the offloading arrays.
11004     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11005 
11006     // Source location for the ident struct
11007     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11008 
11009     llvm::Value *OffloadingArgs[] = {RTLoc,
11010                                      DeviceID,
11011                                      PointerNum,
11012                                      BasePointersArrayArg,
11013                                      PointersArrayArg,
11014                                      SizesArrayArg,
11015                                      MapTypesArrayArg,
11016                                      MapNamesArrayArg,
11017                                      MappersArrayArg};
11018     CGF.EmitRuntimeCall(
11019         OMPBuilder.getOrCreateRuntimeFunction(
11020             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11021         OffloadingArgs);
11022   };
11023 
11024   // If we need device pointer privatization, we need to emit the body of the
11025   // region with no privatization in the 'else' branch of the conditional.
11026   // Otherwise, we don't have to do anything.
11027   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11028                                                          PrePostActionTy &) {
11029     if (!Info.CaptureDeviceAddrMap.empty()) {
11030       CodeGen.setAction(NoPrivAction);
11031       CodeGen(CGF);
11032     }
11033   };
11034 
11035   // We don't have to do anything to close the region if the if clause evaluates
11036   // to false.
11037   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11038 
11039   if (IfCond) {
11040     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11041   } else {
11042     RegionCodeGenTy RCG(BeginThenGen);
11043     RCG(CGF);
11044   }
11045 
11046   // If we don't require privatization of device pointers, we emit the body in
11047   // between the runtime calls. This avoids duplicating the body code.
11048   if (Info.CaptureDeviceAddrMap.empty()) {
11049     CodeGen.setAction(NoPrivAction);
11050     CodeGen(CGF);
11051   }
11052 
11053   if (IfCond) {
11054     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11055   } else {
11056     RegionCodeGenTy RCG(EndThenGen);
11057     RCG(CGF);
11058   }
11059 }
11060 
11061 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11062     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11063     const Expr *Device) {
11064   if (!CGF.HaveInsertPoint())
11065     return;
11066 
11067   assert((isa<OMPTargetEnterDataDirective>(D) ||
11068           isa<OMPTargetExitDataDirective>(D) ||
11069           isa<OMPTargetUpdateDirective>(D)) &&
11070          "Expecting either target enter, exit data, or update directives.");
11071 
11072   CodeGenFunction::OMPTargetDataInfo InputInfo;
11073   llvm::Value *MapTypesArray = nullptr;
11074   llvm::Value *MapNamesArray = nullptr;
11075   // Generate the code for the opening of the data environment.
11076   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11077                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11078     // Emit device ID if any.
11079     llvm::Value *DeviceID = nullptr;
11080     if (Device) {
11081       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11082                                            CGF.Int64Ty, /*isSigned=*/true);
11083     } else {
11084       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11085     }
11086 
11087     // Emit the number of elements in the offloading arrays.
11088     llvm::Constant *PointerNum =
11089         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11090 
11091     // Source location for the ident struct
11092     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11093 
11094     llvm::Value *OffloadingArgs[] = {RTLoc,
11095                                      DeviceID,
11096                                      PointerNum,
11097                                      InputInfo.BasePointersArray.getPointer(),
11098                                      InputInfo.PointersArray.getPointer(),
11099                                      InputInfo.SizesArray.getPointer(),
11100                                      MapTypesArray,
11101                                      MapNamesArray,
11102                                      InputInfo.MappersArray.getPointer()};
11103 
11104     // Select the right runtime function call for each standalone
11105     // directive.
11106     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11107     RuntimeFunction RTLFn;
11108     switch (D.getDirectiveKind()) {
11109     case OMPD_target_enter_data:
11110       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11111                         : OMPRTL___tgt_target_data_begin_mapper;
11112       break;
11113     case OMPD_target_exit_data:
11114       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11115                         : OMPRTL___tgt_target_data_end_mapper;
11116       break;
11117     case OMPD_target_update:
11118       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11119                         : OMPRTL___tgt_target_data_update_mapper;
11120       break;
11121     case OMPD_parallel:
11122     case OMPD_for:
11123     case OMPD_parallel_for:
11124     case OMPD_parallel_master:
11125     case OMPD_parallel_sections:
11126     case OMPD_for_simd:
11127     case OMPD_parallel_for_simd:
11128     case OMPD_cancel:
11129     case OMPD_cancellation_point:
11130     case OMPD_ordered:
11131     case OMPD_threadprivate:
11132     case OMPD_allocate:
11133     case OMPD_task:
11134     case OMPD_simd:
11135     case OMPD_tile:
11136     case OMPD_sections:
11137     case OMPD_section:
11138     case OMPD_single:
11139     case OMPD_master:
11140     case OMPD_critical:
11141     case OMPD_taskyield:
11142     case OMPD_barrier:
11143     case OMPD_taskwait:
11144     case OMPD_taskgroup:
11145     case OMPD_atomic:
11146     case OMPD_flush:
11147     case OMPD_depobj:
11148     case OMPD_scan:
11149     case OMPD_teams:
11150     case OMPD_target_data:
11151     case OMPD_distribute:
11152     case OMPD_distribute_simd:
11153     case OMPD_distribute_parallel_for:
11154     case OMPD_distribute_parallel_for_simd:
11155     case OMPD_teams_distribute:
11156     case OMPD_teams_distribute_simd:
11157     case OMPD_teams_distribute_parallel_for:
11158     case OMPD_teams_distribute_parallel_for_simd:
11159     case OMPD_declare_simd:
11160     case OMPD_declare_variant:
11161     case OMPD_begin_declare_variant:
11162     case OMPD_end_declare_variant:
11163     case OMPD_declare_target:
11164     case OMPD_end_declare_target:
11165     case OMPD_declare_reduction:
11166     case OMPD_declare_mapper:
11167     case OMPD_taskloop:
11168     case OMPD_taskloop_simd:
11169     case OMPD_master_taskloop:
11170     case OMPD_master_taskloop_simd:
11171     case OMPD_parallel_master_taskloop:
11172     case OMPD_parallel_master_taskloop_simd:
11173     case OMPD_target:
11174     case OMPD_target_simd:
11175     case OMPD_target_teams_distribute:
11176     case OMPD_target_teams_distribute_simd:
11177     case OMPD_target_teams_distribute_parallel_for:
11178     case OMPD_target_teams_distribute_parallel_for_simd:
11179     case OMPD_target_teams:
11180     case OMPD_target_parallel:
11181     case OMPD_target_parallel_for:
11182     case OMPD_target_parallel_for_simd:
11183     case OMPD_requires:
11184     case OMPD_unknown:
11185     default:
11186       llvm_unreachable("Unexpected standalone target data directive.");
11187       break;
11188     }
11189     CGF.EmitRuntimeCall(
11190         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11191         OffloadingArgs);
11192   };
11193 
11194   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11195                           &MapNamesArray](CodeGenFunction &CGF,
11196                                           PrePostActionTy &) {
11197     // Fill up the arrays with all the mapped variables.
11198     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11199 
11200     // Get map clause information.
11201     MappableExprsHandler MEHandler(D, CGF);
11202     MEHandler.generateAllInfo(CombinedInfo);
11203 
11204     TargetDataInfo Info;
11205     // Fill up the arrays and create the arguments.
11206     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11207                          /*IsNonContiguous=*/true);
11208     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11209                              D.hasClausesOfKind<OMPNowaitClause>();
11210     emitOffloadingArraysArgument(
11211         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11212         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11213         {/*ForEndTask=*/false});
11214     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11215     InputInfo.BasePointersArray =
11216         Address(Info.BasePointersArray, CGM.getPointerAlign());
11217     InputInfo.PointersArray =
11218         Address(Info.PointersArray, CGM.getPointerAlign());
11219     InputInfo.SizesArray =
11220         Address(Info.SizesArray, CGM.getPointerAlign());
11221     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11222     MapTypesArray = Info.MapTypesArray;
11223     MapNamesArray = Info.MapNamesArray;
11224     if (RequiresOuterTask)
11225       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11226     else
11227       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11228   };
11229 
11230   if (IfCond) {
11231     emitIfClause(CGF, IfCond, TargetThenGen,
11232                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11233   } else {
11234     RegionCodeGenTy ThenRCG(TargetThenGen);
11235     ThenRCG(CGF);
11236   }
11237 }
11238 
11239 namespace {
11240   /// Kind of parameter in a function with 'declare simd' directive.
11241   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11242   /// Attribute set of the parameter.
11243   struct ParamAttrTy {
11244     ParamKindTy Kind = Vector;
11245     llvm::APSInt StrideOrArg;
11246     llvm::APSInt Alignment;
11247   };
11248 } // namespace
11249 
11250 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11251                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11252   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11253   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11254   // of that clause. The VLEN value must be power of 2.
11255   // In other case the notion of the function`s "characteristic data type" (CDT)
11256   // is used to compute the vector length.
11257   // CDT is defined in the following order:
11258   //   a) For non-void function, the CDT is the return type.
11259   //   b) If the function has any non-uniform, non-linear parameters, then the
11260   //   CDT is the type of the first such parameter.
11261   //   c) If the CDT determined by a) or b) above is struct, union, or class
11262   //   type which is pass-by-value (except for the type that maps to the
11263   //   built-in complex data type), the characteristic data type is int.
11264   //   d) If none of the above three cases is applicable, the CDT is int.
11265   // The VLEN is then determined based on the CDT and the size of vector
11266   // register of that ISA for which current vector version is generated. The
11267   // VLEN is computed using the formula below:
11268   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11269   // where vector register size specified in section 3.2.1 Registers and the
11270   // Stack Frame of original AMD64 ABI document.
11271   QualType RetType = FD->getReturnType();
11272   if (RetType.isNull())
11273     return 0;
11274   ASTContext &C = FD->getASTContext();
11275   QualType CDT;
11276   if (!RetType.isNull() && !RetType->isVoidType()) {
11277     CDT = RetType;
11278   } else {
11279     unsigned Offset = 0;
11280     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11281       if (ParamAttrs[Offset].Kind == Vector)
11282         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11283       ++Offset;
11284     }
11285     if (CDT.isNull()) {
11286       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11287         if (ParamAttrs[I + Offset].Kind == Vector) {
11288           CDT = FD->getParamDecl(I)->getType();
11289           break;
11290         }
11291       }
11292     }
11293   }
11294   if (CDT.isNull())
11295     CDT = C.IntTy;
11296   CDT = CDT->getCanonicalTypeUnqualified();
11297   if (CDT->isRecordType() || CDT->isUnionType())
11298     CDT = C.IntTy;
11299   return C.getTypeSize(CDT);
11300 }
11301 
11302 static void
11303 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11304                            const llvm::APSInt &VLENVal,
11305                            ArrayRef<ParamAttrTy> ParamAttrs,
11306                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11307   struct ISADataTy {
11308     char ISA;
11309     unsigned VecRegSize;
11310   };
11311   ISADataTy ISAData[] = {
11312       {
11313           'b', 128
11314       }, // SSE
11315       {
11316           'c', 256
11317       }, // AVX
11318       {
11319           'd', 256
11320       }, // AVX2
11321       {
11322           'e', 512
11323       }, // AVX512
11324   };
11325   llvm::SmallVector<char, 2> Masked;
11326   switch (State) {
11327   case OMPDeclareSimdDeclAttr::BS_Undefined:
11328     Masked.push_back('N');
11329     Masked.push_back('M');
11330     break;
11331   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11332     Masked.push_back('N');
11333     break;
11334   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11335     Masked.push_back('M');
11336     break;
11337   }
11338   for (char Mask : Masked) {
11339     for (const ISADataTy &Data : ISAData) {
11340       SmallString<256> Buffer;
11341       llvm::raw_svector_ostream Out(Buffer);
11342       Out << "_ZGV" << Data.ISA << Mask;
11343       if (!VLENVal) {
11344         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11345         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11346         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11347       } else {
11348         Out << VLENVal;
11349       }
11350       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11351         switch (ParamAttr.Kind){
11352         case LinearWithVarStride:
11353           Out << 's' << ParamAttr.StrideOrArg;
11354           break;
11355         case Linear:
11356           Out << 'l';
11357           if (ParamAttr.StrideOrArg != 1)
11358             Out << ParamAttr.StrideOrArg;
11359           break;
11360         case Uniform:
11361           Out << 'u';
11362           break;
11363         case Vector:
11364           Out << 'v';
11365           break;
11366         }
11367         if (!!ParamAttr.Alignment)
11368           Out << 'a' << ParamAttr.Alignment;
11369       }
11370       Out << '_' << Fn->getName();
11371       Fn->addFnAttr(Out.str());
11372     }
11373   }
11374 }
11375 
11376 // This are the Functions that are needed to mangle the name of the
11377 // vector functions generated by the compiler, according to the rules
11378 // defined in the "Vector Function ABI specifications for AArch64",
11379 // available at
11380 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11381 
11382 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11383 ///
11384 /// TODO: Need to implement the behavior for reference marked with a
11385 /// var or no linear modifiers (1.b in the section). For this, we
11386 /// need to extend ParamKindTy to support the linear modifiers.
11387 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11388   QT = QT.getCanonicalType();
11389 
11390   if (QT->isVoidType())
11391     return false;
11392 
11393   if (Kind == ParamKindTy::Uniform)
11394     return false;
11395 
11396   if (Kind == ParamKindTy::Linear)
11397     return false;
11398 
11399   // TODO: Handle linear references with modifiers
11400 
11401   if (Kind == ParamKindTy::LinearWithVarStride)
11402     return false;
11403 
11404   return true;
11405 }
11406 
11407 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11408 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11409   QT = QT.getCanonicalType();
11410   unsigned Size = C.getTypeSize(QT);
11411 
11412   // Only scalars and complex within 16 bytes wide set PVB to true.
11413   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11414     return false;
11415 
11416   if (QT->isFloatingType())
11417     return true;
11418 
11419   if (QT->isIntegerType())
11420     return true;
11421 
11422   if (QT->isPointerType())
11423     return true;
11424 
11425   // TODO: Add support for complex types (section 3.1.2, item 2).
11426 
11427   return false;
11428 }
11429 
11430 /// Computes the lane size (LS) of a return type or of an input parameter,
11431 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11432 /// TODO: Add support for references, section 3.2.1, item 1.
11433 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11434   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11435     QualType PTy = QT.getCanonicalType()->getPointeeType();
11436     if (getAArch64PBV(PTy, C))
11437       return C.getTypeSize(PTy);
11438   }
11439   if (getAArch64PBV(QT, C))
11440     return C.getTypeSize(QT);
11441 
11442   return C.getTypeSize(C.getUIntPtrType());
11443 }
11444 
11445 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11446 // signature of the scalar function, as defined in 3.2.2 of the
11447 // AAVFABI.
11448 static std::tuple<unsigned, unsigned, bool>
11449 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11450   QualType RetType = FD->getReturnType().getCanonicalType();
11451 
11452   ASTContext &C = FD->getASTContext();
11453 
11454   bool OutputBecomesInput = false;
11455 
11456   llvm::SmallVector<unsigned, 8> Sizes;
11457   if (!RetType->isVoidType()) {
11458     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11459     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11460       OutputBecomesInput = true;
11461   }
11462   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11463     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11464     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11465   }
11466 
11467   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11468   // The LS of a function parameter / return value can only be a power
11469   // of 2, starting from 8 bits, up to 128.
11470   assert(std::all_of(Sizes.begin(), Sizes.end(),
11471                      [](unsigned Size) {
11472                        return Size == 8 || Size == 16 || Size == 32 ||
11473                               Size == 64 || Size == 128;
11474                      }) &&
11475          "Invalid size");
11476 
11477   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11478                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11479                          OutputBecomesInput);
11480 }
11481 
11482 /// Mangle the parameter part of the vector function name according to
11483 /// their OpenMP classification. The mangling function is defined in
11484 /// section 3.5 of the AAVFABI.
11485 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11486   SmallString<256> Buffer;
11487   llvm::raw_svector_ostream Out(Buffer);
11488   for (const auto &ParamAttr : ParamAttrs) {
11489     switch (ParamAttr.Kind) {
11490     case LinearWithVarStride:
11491       Out << "ls" << ParamAttr.StrideOrArg;
11492       break;
11493     case Linear:
11494       Out << 'l';
11495       // Don't print the step value if it is not present or if it is
11496       // equal to 1.
11497       if (ParamAttr.StrideOrArg != 1)
11498         Out << ParamAttr.StrideOrArg;
11499       break;
11500     case Uniform:
11501       Out << 'u';
11502       break;
11503     case Vector:
11504       Out << 'v';
11505       break;
11506     }
11507 
11508     if (!!ParamAttr.Alignment)
11509       Out << 'a' << ParamAttr.Alignment;
11510   }
11511 
11512   return std::string(Out.str());
11513 }
11514 
11515 // Function used to add the attribute. The parameter `VLEN` is
11516 // templated to allow the use of "x" when targeting scalable functions
11517 // for SVE.
11518 template <typename T>
11519 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11520                                  char ISA, StringRef ParSeq,
11521                                  StringRef MangledName, bool OutputBecomesInput,
11522                                  llvm::Function *Fn) {
11523   SmallString<256> Buffer;
11524   llvm::raw_svector_ostream Out(Buffer);
11525   Out << Prefix << ISA << LMask << VLEN;
11526   if (OutputBecomesInput)
11527     Out << "v";
11528   Out << ParSeq << "_" << MangledName;
11529   Fn->addFnAttr(Out.str());
11530 }
11531 
11532 // Helper function to generate the Advanced SIMD names depending on
11533 // the value of the NDS when simdlen is not present.
11534 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11535                                       StringRef Prefix, char ISA,
11536                                       StringRef ParSeq, StringRef MangledName,
11537                                       bool OutputBecomesInput,
11538                                       llvm::Function *Fn) {
11539   switch (NDS) {
11540   case 8:
11541     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11542                          OutputBecomesInput, Fn);
11543     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11544                          OutputBecomesInput, Fn);
11545     break;
11546   case 16:
11547     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11548                          OutputBecomesInput, Fn);
11549     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11550                          OutputBecomesInput, Fn);
11551     break;
11552   case 32:
11553     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11554                          OutputBecomesInput, Fn);
11555     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11556                          OutputBecomesInput, Fn);
11557     break;
11558   case 64:
11559   case 128:
11560     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11561                          OutputBecomesInput, Fn);
11562     break;
11563   default:
11564     llvm_unreachable("Scalar type is too wide.");
11565   }
11566 }
11567 
11568 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11569 static void emitAArch64DeclareSimdFunction(
11570     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11571     ArrayRef<ParamAttrTy> ParamAttrs,
11572     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11573     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11574 
11575   // Get basic data for building the vector signature.
11576   const auto Data = getNDSWDS(FD, ParamAttrs);
11577   const unsigned NDS = std::get<0>(Data);
11578   const unsigned WDS = std::get<1>(Data);
11579   const bool OutputBecomesInput = std::get<2>(Data);
11580 
11581   // Check the values provided via `simdlen` by the user.
11582   // 1. A `simdlen(1)` doesn't produce vector signatures,
11583   if (UserVLEN == 1) {
11584     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11585         DiagnosticsEngine::Warning,
11586         "The clause simdlen(1) has no effect when targeting aarch64.");
11587     CGM.getDiags().Report(SLoc, DiagID);
11588     return;
11589   }
11590 
11591   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11592   // Advanced SIMD output.
11593   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11594     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11595         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11596                                     "power of 2 when targeting Advanced SIMD.");
11597     CGM.getDiags().Report(SLoc, DiagID);
11598     return;
11599   }
11600 
11601   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11602   // limits.
11603   if (ISA == 's' && UserVLEN != 0) {
11604     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11605       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11606           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11607                                       "lanes in the architectural constraints "
11608                                       "for SVE (min is 128-bit, max is "
11609                                       "2048-bit, by steps of 128-bit)");
11610       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11611       return;
11612     }
11613   }
11614 
11615   // Sort out parameter sequence.
11616   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11617   StringRef Prefix = "_ZGV";
11618   // Generate simdlen from user input (if any).
11619   if (UserVLEN) {
11620     if (ISA == 's') {
11621       // SVE generates only a masked function.
11622       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11623                            OutputBecomesInput, Fn);
11624     } else {
11625       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11626       // Advanced SIMD generates one or two functions, depending on
11627       // the `[not]inbranch` clause.
11628       switch (State) {
11629       case OMPDeclareSimdDeclAttr::BS_Undefined:
11630         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11631                              OutputBecomesInput, Fn);
11632         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11633                              OutputBecomesInput, Fn);
11634         break;
11635       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11636         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11637                              OutputBecomesInput, Fn);
11638         break;
11639       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11640         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11641                              OutputBecomesInput, Fn);
11642         break;
11643       }
11644     }
11645   } else {
11646     // If no user simdlen is provided, follow the AAVFABI rules for
11647     // generating the vector length.
11648     if (ISA == 's') {
11649       // SVE, section 3.4.1, item 1.
11650       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11651                            OutputBecomesInput, Fn);
11652     } else {
11653       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11654       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11655       // two vector names depending on the use of the clause
11656       // `[not]inbranch`.
11657       switch (State) {
11658       case OMPDeclareSimdDeclAttr::BS_Undefined:
11659         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11660                                   OutputBecomesInput, Fn);
11661         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11662                                   OutputBecomesInput, Fn);
11663         break;
11664       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11665         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11666                                   OutputBecomesInput, Fn);
11667         break;
11668       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11669         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11670                                   OutputBecomesInput, Fn);
11671         break;
11672       }
11673     }
11674   }
11675 }
11676 
11677 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11678                                               llvm::Function *Fn) {
11679   ASTContext &C = CGM.getContext();
11680   FD = FD->getMostRecentDecl();
11681   // Map params to their positions in function decl.
11682   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11683   if (isa<CXXMethodDecl>(FD))
11684     ParamPositions.try_emplace(FD, 0);
11685   unsigned ParamPos = ParamPositions.size();
11686   for (const ParmVarDecl *P : FD->parameters()) {
11687     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11688     ++ParamPos;
11689   }
11690   while (FD) {
11691     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11692       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11693       // Mark uniform parameters.
11694       for (const Expr *E : Attr->uniforms()) {
11695         E = E->IgnoreParenImpCasts();
11696         unsigned Pos;
11697         if (isa<CXXThisExpr>(E)) {
11698           Pos = ParamPositions[FD];
11699         } else {
11700           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11701                                 ->getCanonicalDecl();
11702           Pos = ParamPositions[PVD];
11703         }
11704         ParamAttrs[Pos].Kind = Uniform;
11705       }
11706       // Get alignment info.
11707       auto NI = Attr->alignments_begin();
11708       for (const Expr *E : Attr->aligneds()) {
11709         E = E->IgnoreParenImpCasts();
11710         unsigned Pos;
11711         QualType ParmTy;
11712         if (isa<CXXThisExpr>(E)) {
11713           Pos = ParamPositions[FD];
11714           ParmTy = E->getType();
11715         } else {
11716           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11717                                 ->getCanonicalDecl();
11718           Pos = ParamPositions[PVD];
11719           ParmTy = PVD->getType();
11720         }
11721         ParamAttrs[Pos].Alignment =
11722             (*NI)
11723                 ? (*NI)->EvaluateKnownConstInt(C)
11724                 : llvm::APSInt::getUnsigned(
11725                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11726                           .getQuantity());
11727         ++NI;
11728       }
11729       // Mark linear parameters.
11730       auto SI = Attr->steps_begin();
11731       auto MI = Attr->modifiers_begin();
11732       for (const Expr *E : Attr->linears()) {
11733         E = E->IgnoreParenImpCasts();
11734         unsigned Pos;
11735         // Rescaling factor needed to compute the linear parameter
11736         // value in the mangled name.
11737         unsigned PtrRescalingFactor = 1;
11738         if (isa<CXXThisExpr>(E)) {
11739           Pos = ParamPositions[FD];
11740         } else {
11741           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11742                                 ->getCanonicalDecl();
11743           Pos = ParamPositions[PVD];
11744           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11745             PtrRescalingFactor = CGM.getContext()
11746                                      .getTypeSizeInChars(P->getPointeeType())
11747                                      .getQuantity();
11748         }
11749         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11750         ParamAttr.Kind = Linear;
11751         // Assuming a stride of 1, for `linear` without modifiers.
11752         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11753         if (*SI) {
11754           Expr::EvalResult Result;
11755           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11756             if (const auto *DRE =
11757                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11758               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11759                 ParamAttr.Kind = LinearWithVarStride;
11760                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11761                     ParamPositions[StridePVD->getCanonicalDecl()]);
11762               }
11763             }
11764           } else {
11765             ParamAttr.StrideOrArg = Result.Val.getInt();
11766           }
11767         }
11768         // If we are using a linear clause on a pointer, we need to
11769         // rescale the value of linear_step with the byte size of the
11770         // pointee type.
11771         if (Linear == ParamAttr.Kind)
11772           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11773         ++SI;
11774         ++MI;
11775       }
11776       llvm::APSInt VLENVal;
11777       SourceLocation ExprLoc;
11778       const Expr *VLENExpr = Attr->getSimdlen();
11779       if (VLENExpr) {
11780         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11781         ExprLoc = VLENExpr->getExprLoc();
11782       }
11783       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11784       if (CGM.getTriple().isX86()) {
11785         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11786       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11787         unsigned VLEN = VLENVal.getExtValue();
11788         StringRef MangledName = Fn->getName();
11789         if (CGM.getTarget().hasFeature("sve"))
11790           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11791                                          MangledName, 's', 128, Fn, ExprLoc);
11792         if (CGM.getTarget().hasFeature("neon"))
11793           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11794                                          MangledName, 'n', 128, Fn, ExprLoc);
11795       }
11796     }
11797     FD = FD->getPreviousDecl();
11798   }
11799 }
11800 
11801 namespace {
11802 /// Cleanup action for doacross support.
11803 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11804 public:
11805   static const int DoacrossFinArgs = 2;
11806 
11807 private:
11808   llvm::FunctionCallee RTLFn;
11809   llvm::Value *Args[DoacrossFinArgs];
11810 
11811 public:
11812   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11813                     ArrayRef<llvm::Value *> CallArgs)
11814       : RTLFn(RTLFn) {
11815     assert(CallArgs.size() == DoacrossFinArgs);
11816     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11817   }
11818   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11819     if (!CGF.HaveInsertPoint())
11820       return;
11821     CGF.EmitRuntimeCall(RTLFn, Args);
11822   }
11823 };
11824 } // namespace
11825 
11826 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11827                                        const OMPLoopDirective &D,
11828                                        ArrayRef<Expr *> NumIterations) {
11829   if (!CGF.HaveInsertPoint())
11830     return;
11831 
11832   ASTContext &C = CGM.getContext();
11833   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11834   RecordDecl *RD;
11835   if (KmpDimTy.isNull()) {
11836     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11837     //  kmp_int64 lo; // lower
11838     //  kmp_int64 up; // upper
11839     //  kmp_int64 st; // stride
11840     // };
11841     RD = C.buildImplicitRecord("kmp_dim");
11842     RD->startDefinition();
11843     addFieldToRecordDecl(C, RD, Int64Ty);
11844     addFieldToRecordDecl(C, RD, Int64Ty);
11845     addFieldToRecordDecl(C, RD, Int64Ty);
11846     RD->completeDefinition();
11847     KmpDimTy = C.getRecordType(RD);
11848   } else {
11849     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11850   }
11851   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11852   QualType ArrayTy =
11853       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11854 
11855   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11856   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11857   enum { LowerFD = 0, UpperFD, StrideFD };
11858   // Fill dims with data.
11859   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11860     LValue DimsLVal = CGF.MakeAddrLValue(
11861         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11862     // dims.upper = num_iterations;
11863     LValue UpperLVal = CGF.EmitLValueForField(
11864         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11865     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11866         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11867         Int64Ty, NumIterations[I]->getExprLoc());
11868     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11869     // dims.stride = 1;
11870     LValue StrideLVal = CGF.EmitLValueForField(
11871         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11872     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11873                           StrideLVal);
11874   }
11875 
11876   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11877   // kmp_int32 num_dims, struct kmp_dim * dims);
11878   llvm::Value *Args[] = {
11879       emitUpdateLocation(CGF, D.getBeginLoc()),
11880       getThreadID(CGF, D.getBeginLoc()),
11881       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11882       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11883           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11884           CGM.VoidPtrTy)};
11885 
11886   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11887       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11888   CGF.EmitRuntimeCall(RTLFn, Args);
11889   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11890       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11891   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11892       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11893   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11894                                              llvm::makeArrayRef(FiniArgs));
11895 }
11896 
11897 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11898                                           const OMPDependClause *C) {
11899   QualType Int64Ty =
11900       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11901   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11902   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11903       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11904   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11905   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11906     const Expr *CounterVal = C->getLoopData(I);
11907     assert(CounterVal);
11908     llvm::Value *CntVal = CGF.EmitScalarConversion(
11909         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11910         CounterVal->getExprLoc());
11911     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11912                           /*Volatile=*/false, Int64Ty);
11913   }
11914   llvm::Value *Args[] = {
11915       emitUpdateLocation(CGF, C->getBeginLoc()),
11916       getThreadID(CGF, C->getBeginLoc()),
11917       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11918   llvm::FunctionCallee RTLFn;
11919   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11920     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11921                                                   OMPRTL___kmpc_doacross_post);
11922   } else {
11923     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11924     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11925                                                   OMPRTL___kmpc_doacross_wait);
11926   }
11927   CGF.EmitRuntimeCall(RTLFn, Args);
11928 }
11929 
11930 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11931                                llvm::FunctionCallee Callee,
11932                                ArrayRef<llvm::Value *> Args) const {
11933   assert(Loc.isValid() && "Outlined function call location must be valid.");
11934   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11935 
11936   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11937     if (Fn->doesNotThrow()) {
11938       CGF.EmitNounwindRuntimeCall(Fn, Args);
11939       return;
11940     }
11941   }
11942   CGF.EmitRuntimeCall(Callee, Args);
11943 }
11944 
11945 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11946     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11947     ArrayRef<llvm::Value *> Args) const {
11948   emitCall(CGF, Loc, OutlinedFn, Args);
11949 }
11950 
11951 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11952   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11953     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11954       HasEmittedDeclareTargetRegion = true;
11955 }
11956 
11957 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11958                                              const VarDecl *NativeParam,
11959                                              const VarDecl *TargetParam) const {
11960   return CGF.GetAddrOfLocalVar(NativeParam);
11961 }
11962 
11963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11964                                                    const VarDecl *VD) {
11965   if (!VD)
11966     return Address::invalid();
11967   Address UntiedAddr = Address::invalid();
11968   Address UntiedRealAddr = Address::invalid();
11969   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11970   if (It != FunctionToUntiedTaskStackMap.end()) {
11971     const UntiedLocalVarsAddressesMap &UntiedData =
11972         UntiedLocalVarsStack[It->second];
11973     auto I = UntiedData.find(VD);
11974     if (I != UntiedData.end()) {
11975       UntiedAddr = I->second.first;
11976       UntiedRealAddr = I->second.second;
11977     }
11978   }
11979   const VarDecl *CVD = VD->getCanonicalDecl();
11980   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11981     // Use the default allocation.
11982     if (!isAllocatableDecl(VD))
11983       return UntiedAddr;
11984     llvm::Value *Size;
11985     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11986     if (CVD->getType()->isVariablyModifiedType()) {
11987       Size = CGF.getTypeSize(CVD->getType());
11988       // Align the size: ((size + align - 1) / align) * align
11989       Size = CGF.Builder.CreateNUWAdd(
11990           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11991       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11992       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11993     } else {
11994       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11995       Size = CGM.getSize(Sz.alignTo(Align));
11996     }
11997     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11998     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11999     assert(AA->getAllocator() &&
12000            "Expected allocator expression for non-default allocator.");
12001     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12002     // According to the standard, the original allocator type is a enum
12003     // (integer). Convert to pointer type, if required.
12004     Allocator = CGF.EmitScalarConversion(
12005         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12006         AA->getAllocator()->getExprLoc());
12007     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12008 
12009     llvm::Value *Addr =
12010         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12011                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12012                             Args, getName({CVD->getName(), ".void.addr"}));
12013     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12014         CGM.getModule(), OMPRTL___kmpc_free);
12015     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12016     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12017         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12018     if (UntiedAddr.isValid())
12019       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12020 
12021     // Cleanup action for allocate support.
12022     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12023       llvm::FunctionCallee RTLFn;
12024       unsigned LocEncoding;
12025       Address Addr;
12026       const Expr *Allocator;
12027 
12028     public:
12029       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
12030                            Address Addr, const Expr *Allocator)
12031           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12032             Allocator(Allocator) {}
12033       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12034         if (!CGF.HaveInsertPoint())
12035           return;
12036         llvm::Value *Args[3];
12037         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12038             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12039         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12040             Addr.getPointer(), CGF.VoidPtrTy);
12041         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12042         // According to the standard, the original allocator type is a enum
12043         // (integer). Convert to pointer type, if required.
12044         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12045                                             CGF.getContext().VoidPtrTy,
12046                                             Allocator->getExprLoc());
12047         Args[2] = AllocVal;
12048 
12049         CGF.EmitRuntimeCall(RTLFn, Args);
12050       }
12051     };
12052     Address VDAddr =
12053         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12054     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12055         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12056         VDAddr, AA->getAllocator());
12057     if (UntiedRealAddr.isValid())
12058       if (auto *Region =
12059               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12060         Region->emitUntiedSwitch(CGF);
12061     return VDAddr;
12062   }
12063   return UntiedAddr;
12064 }
12065 
12066 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12067                                              const VarDecl *VD) const {
12068   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12069   if (It == FunctionToUntiedTaskStackMap.end())
12070     return false;
12071   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12072 }
12073 
12074 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12075     CodeGenModule &CGM, const OMPLoopDirective &S)
12076     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12077   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12078   if (!NeedToPush)
12079     return;
12080   NontemporalDeclsSet &DS =
12081       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12082   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12083     for (const Stmt *Ref : C->private_refs()) {
12084       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12085       const ValueDecl *VD;
12086       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12087         VD = DRE->getDecl();
12088       } else {
12089         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12090         assert((ME->isImplicitCXXThis() ||
12091                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12092                "Expected member of current class.");
12093         VD = ME->getMemberDecl();
12094       }
12095       DS.insert(VD);
12096     }
12097   }
12098 }
12099 
12100 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12101   if (!NeedToPush)
12102     return;
12103   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12104 }
12105 
12106 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12107     CodeGenFunction &CGF,
12108     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12109                          std::pair<Address, Address>> &LocalVars)
12110     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12111   if (!NeedToPush)
12112     return;
12113   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12114       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12115   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12116 }
12117 
12118 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12119   if (!NeedToPush)
12120     return;
12121   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12122 }
12123 
12124 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12125   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12126 
12127   return llvm::any_of(
12128       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12129       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12130 }
12131 
12132 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12133     const OMPExecutableDirective &S,
12134     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12135     const {
12136   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12137   // Vars in target/task regions must be excluded completely.
12138   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12139       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12140     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12141     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12142     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12143     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12144       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12145         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12146     }
12147   }
12148   // Exclude vars in private clauses.
12149   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12150     for (const Expr *Ref : C->varlists()) {
12151       if (!Ref->getType()->isScalarType())
12152         continue;
12153       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12154       if (!DRE)
12155         continue;
12156       NeedToCheckForLPCs.insert(DRE->getDecl());
12157     }
12158   }
12159   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12160     for (const Expr *Ref : C->varlists()) {
12161       if (!Ref->getType()->isScalarType())
12162         continue;
12163       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12164       if (!DRE)
12165         continue;
12166       NeedToCheckForLPCs.insert(DRE->getDecl());
12167     }
12168   }
12169   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12170     for (const Expr *Ref : C->varlists()) {
12171       if (!Ref->getType()->isScalarType())
12172         continue;
12173       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12174       if (!DRE)
12175         continue;
12176       NeedToCheckForLPCs.insert(DRE->getDecl());
12177     }
12178   }
12179   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12180     for (const Expr *Ref : C->varlists()) {
12181       if (!Ref->getType()->isScalarType())
12182         continue;
12183       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12184       if (!DRE)
12185         continue;
12186       NeedToCheckForLPCs.insert(DRE->getDecl());
12187     }
12188   }
12189   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12190     for (const Expr *Ref : C->varlists()) {
12191       if (!Ref->getType()->isScalarType())
12192         continue;
12193       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12194       if (!DRE)
12195         continue;
12196       NeedToCheckForLPCs.insert(DRE->getDecl());
12197     }
12198   }
12199   for (const Decl *VD : NeedToCheckForLPCs) {
12200     for (const LastprivateConditionalData &Data :
12201          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12202       if (Data.DeclToUniqueName.count(VD) > 0) {
12203         if (!Data.Disabled)
12204           NeedToAddForLPCsAsDisabled.insert(VD);
12205         break;
12206       }
12207     }
12208   }
12209 }
12210 
12211 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12212     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12213     : CGM(CGF.CGM),
12214       Action((CGM.getLangOpts().OpenMP >= 50 &&
12215               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12216                            [](const OMPLastprivateClause *C) {
12217                              return C->getKind() ==
12218                                     OMPC_LASTPRIVATE_conditional;
12219                            }))
12220                  ? ActionToDo::PushAsLastprivateConditional
12221                  : ActionToDo::DoNotPush) {
12222   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12223   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12224     return;
12225   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12226          "Expected a push action.");
12227   LastprivateConditionalData &Data =
12228       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12229   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12230     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12231       continue;
12232 
12233     for (const Expr *Ref : C->varlists()) {
12234       Data.DeclToUniqueName.insert(std::make_pair(
12235           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12236           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12237     }
12238   }
12239   Data.IVLVal = IVLVal;
12240   Data.Fn = CGF.CurFn;
12241 }
12242 
12243 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12244     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12245     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12246   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12247   if (CGM.getLangOpts().OpenMP < 50)
12248     return;
12249   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12250   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12251   if (!NeedToAddForLPCsAsDisabled.empty()) {
12252     Action = ActionToDo::DisableLastprivateConditional;
12253     LastprivateConditionalData &Data =
12254         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12255     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12256       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12257     Data.Fn = CGF.CurFn;
12258     Data.Disabled = true;
12259   }
12260 }
12261 
12262 CGOpenMPRuntime::LastprivateConditionalRAII
12263 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12264     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12265   return LastprivateConditionalRAII(CGF, S);
12266 }
12267 
12268 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12269   if (CGM.getLangOpts().OpenMP < 50)
12270     return;
12271   if (Action == ActionToDo::DisableLastprivateConditional) {
12272     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12273            "Expected list of disabled private vars.");
12274     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12275   }
12276   if (Action == ActionToDo::PushAsLastprivateConditional) {
12277     assert(
12278         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12279         "Expected list of lastprivate conditional vars.");
12280     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12281   }
12282 }
12283 
12284 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12285                                                         const VarDecl *VD) {
12286   ASTContext &C = CGM.getContext();
12287   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12288   if (I == LastprivateConditionalToTypes.end())
12289     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12290   QualType NewType;
12291   const FieldDecl *VDField;
12292   const FieldDecl *FiredField;
12293   LValue BaseLVal;
12294   auto VI = I->getSecond().find(VD);
12295   if (VI == I->getSecond().end()) {
12296     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12297     RD->startDefinition();
12298     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12299     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12300     RD->completeDefinition();
12301     NewType = C.getRecordType(RD);
12302     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12303     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12304     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12305   } else {
12306     NewType = std::get<0>(VI->getSecond());
12307     VDField = std::get<1>(VI->getSecond());
12308     FiredField = std::get<2>(VI->getSecond());
12309     BaseLVal = std::get<3>(VI->getSecond());
12310   }
12311   LValue FiredLVal =
12312       CGF.EmitLValueForField(BaseLVal, FiredField);
12313   CGF.EmitStoreOfScalar(
12314       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12315       FiredLVal);
12316   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12317 }
12318 
12319 namespace {
12320 /// Checks if the lastprivate conditional variable is referenced in LHS.
12321 class LastprivateConditionalRefChecker final
12322     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12323   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12324   const Expr *FoundE = nullptr;
12325   const Decl *FoundD = nullptr;
12326   StringRef UniqueDeclName;
12327   LValue IVLVal;
12328   llvm::Function *FoundFn = nullptr;
12329   SourceLocation Loc;
12330 
12331 public:
12332   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12333     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12334          llvm::reverse(LPM)) {
12335       auto It = D.DeclToUniqueName.find(E->getDecl());
12336       if (It == D.DeclToUniqueName.end())
12337         continue;
12338       if (D.Disabled)
12339         return false;
12340       FoundE = E;
12341       FoundD = E->getDecl()->getCanonicalDecl();
12342       UniqueDeclName = It->second;
12343       IVLVal = D.IVLVal;
12344       FoundFn = D.Fn;
12345       break;
12346     }
12347     return FoundE == E;
12348   }
12349   bool VisitMemberExpr(const MemberExpr *E) {
12350     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12351       return false;
12352     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12353          llvm::reverse(LPM)) {
12354       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12355       if (It == D.DeclToUniqueName.end())
12356         continue;
12357       if (D.Disabled)
12358         return false;
12359       FoundE = E;
12360       FoundD = E->getMemberDecl()->getCanonicalDecl();
12361       UniqueDeclName = It->second;
12362       IVLVal = D.IVLVal;
12363       FoundFn = D.Fn;
12364       break;
12365     }
12366     return FoundE == E;
12367   }
12368   bool VisitStmt(const Stmt *S) {
12369     for (const Stmt *Child : S->children()) {
12370       if (!Child)
12371         continue;
12372       if (const auto *E = dyn_cast<Expr>(Child))
12373         if (!E->isGLValue())
12374           continue;
12375       if (Visit(Child))
12376         return true;
12377     }
12378     return false;
12379   }
12380   explicit LastprivateConditionalRefChecker(
12381       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12382       : LPM(LPM) {}
12383   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12384   getFoundData() const {
12385     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12386   }
12387 };
12388 } // namespace
12389 
12390 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12391                                                        LValue IVLVal,
12392                                                        StringRef UniqueDeclName,
12393                                                        LValue LVal,
12394                                                        SourceLocation Loc) {
12395   // Last updated loop counter for the lastprivate conditional var.
12396   // int<xx> last_iv = 0;
12397   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12398   llvm::Constant *LastIV =
12399       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12400   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12401       IVLVal.getAlignment().getAsAlign());
12402   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12403 
12404   // Last value of the lastprivate conditional.
12405   // decltype(priv_a) last_a;
12406   llvm::Constant *Last = getOrCreateInternalVariable(
12407       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12408   cast<llvm::GlobalVariable>(Last)->setAlignment(
12409       LVal.getAlignment().getAsAlign());
12410   LValue LastLVal =
12411       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12412 
12413   // Global loop counter. Required to handle inner parallel-for regions.
12414   // iv
12415   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12416 
12417   // #pragma omp critical(a)
12418   // if (last_iv <= iv) {
12419   //   last_iv = iv;
12420   //   last_a = priv_a;
12421   // }
12422   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12423                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12424     Action.Enter(CGF);
12425     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12426     // (last_iv <= iv) ? Check if the variable is updated and store new
12427     // value in global var.
12428     llvm::Value *CmpRes;
12429     if (IVLVal.getType()->isSignedIntegerType()) {
12430       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12431     } else {
12432       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12433              "Loop iteration variable must be integer.");
12434       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12435     }
12436     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12437     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12438     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12439     // {
12440     CGF.EmitBlock(ThenBB);
12441 
12442     //   last_iv = iv;
12443     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12444 
12445     //   last_a = priv_a;
12446     switch (CGF.getEvaluationKind(LVal.getType())) {
12447     case TEK_Scalar: {
12448       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12449       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12450       break;
12451     }
12452     case TEK_Complex: {
12453       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12454       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12455       break;
12456     }
12457     case TEK_Aggregate:
12458       llvm_unreachable(
12459           "Aggregates are not supported in lastprivate conditional.");
12460     }
12461     // }
12462     CGF.EmitBranch(ExitBB);
12463     // There is no need to emit line number for unconditional branch.
12464     (void)ApplyDebugLocation::CreateEmpty(CGF);
12465     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12466   };
12467 
12468   if (CGM.getLangOpts().OpenMPSimd) {
12469     // Do not emit as a critical region as no parallel region could be emitted.
12470     RegionCodeGenTy ThenRCG(CodeGen);
12471     ThenRCG(CGF);
12472   } else {
12473     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12474   }
12475 }
12476 
12477 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12478                                                          const Expr *LHS) {
12479   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12480     return;
12481   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12482   if (!Checker.Visit(LHS))
12483     return;
12484   const Expr *FoundE;
12485   const Decl *FoundD;
12486   StringRef UniqueDeclName;
12487   LValue IVLVal;
12488   llvm::Function *FoundFn;
12489   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12490       Checker.getFoundData();
12491   if (FoundFn != CGF.CurFn) {
12492     // Special codegen for inner parallel regions.
12493     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12494     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12495     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12496            "Lastprivate conditional is not found in outer region.");
12497     QualType StructTy = std::get<0>(It->getSecond());
12498     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12499     LValue PrivLVal = CGF.EmitLValue(FoundE);
12500     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12501         PrivLVal.getAddress(CGF),
12502         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12503     LValue BaseLVal =
12504         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12505     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12506     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12507                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12508                         FiredLVal, llvm::AtomicOrdering::Unordered,
12509                         /*IsVolatile=*/true, /*isInit=*/false);
12510     return;
12511   }
12512 
12513   // Private address of the lastprivate conditional in the current context.
12514   // priv_a
12515   LValue LVal = CGF.EmitLValue(FoundE);
12516   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12517                                    FoundE->getExprLoc());
12518 }
12519 
12520 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12521     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12522     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12523   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12524     return;
12525   auto Range = llvm::reverse(LastprivateConditionalStack);
12526   auto It = llvm::find_if(
12527       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12528   if (It == Range.end() || It->Fn != CGF.CurFn)
12529     return;
12530   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12531   assert(LPCI != LastprivateConditionalToTypes.end() &&
12532          "Lastprivates must be registered already.");
12533   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12534   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12535   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12536   for (const auto &Pair : It->DeclToUniqueName) {
12537     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12538     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12539       continue;
12540     auto I = LPCI->getSecond().find(Pair.first);
12541     assert(I != LPCI->getSecond().end() &&
12542            "Lastprivate must be rehistered already.");
12543     // bool Cmp = priv_a.Fired != 0;
12544     LValue BaseLVal = std::get<3>(I->getSecond());
12545     LValue FiredLVal =
12546         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12547     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12548     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12549     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12550     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12551     // if (Cmp) {
12552     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12553     CGF.EmitBlock(ThenBB);
12554     Address Addr = CGF.GetAddrOfLocalVar(VD);
12555     LValue LVal;
12556     if (VD->getType()->isReferenceType())
12557       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12558                                            AlignmentSource::Decl);
12559     else
12560       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12561                                 AlignmentSource::Decl);
12562     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12563                                      D.getBeginLoc());
12564     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12565     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12566     // }
12567   }
12568 }
12569 
12570 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12571     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12572     SourceLocation Loc) {
12573   if (CGF.getLangOpts().OpenMP < 50)
12574     return;
12575   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12576   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12577          "Unknown lastprivate conditional variable.");
12578   StringRef UniqueName = It->second;
12579   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12580   // The variable was not updated in the region - exit.
12581   if (!GV)
12582     return;
12583   LValue LPLVal = CGF.MakeAddrLValue(
12584       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12585   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12586   CGF.EmitStoreOfScalar(Res, PrivLVal);
12587 }
12588 
12589 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12590     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12591     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12592   llvm_unreachable("Not supported in SIMD-only mode");
12593 }
12594 
12595 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12596     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12597     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12598   llvm_unreachable("Not supported in SIMD-only mode");
12599 }
12600 
12601 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12602     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12603     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12604     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12605     bool Tied, unsigned &NumberOfParts) {
12606   llvm_unreachable("Not supported in SIMD-only mode");
12607 }
12608 
12609 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12610                                            SourceLocation Loc,
12611                                            llvm::Function *OutlinedFn,
12612                                            ArrayRef<llvm::Value *> CapturedVars,
12613                                            const Expr *IfCond) {
12614   llvm_unreachable("Not supported in SIMD-only mode");
12615 }
12616 
12617 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12618     CodeGenFunction &CGF, StringRef CriticalName,
12619     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12620     const Expr *Hint) {
12621   llvm_unreachable("Not supported in SIMD-only mode");
12622 }
12623 
12624 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12625                                            const RegionCodeGenTy &MasterOpGen,
12626                                            SourceLocation Loc) {
12627   llvm_unreachable("Not supported in SIMD-only mode");
12628 }
12629 
12630 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12631                                            const RegionCodeGenTy &MasterOpGen,
12632                                            SourceLocation Loc,
12633                                            const Expr *Filter) {
12634   llvm_unreachable("Not supported in SIMD-only mode");
12635 }
12636 
12637 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12638                                             SourceLocation Loc) {
12639   llvm_unreachable("Not supported in SIMD-only mode");
12640 }
12641 
12642 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12643     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12644     SourceLocation Loc) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 void CGOpenMPSIMDRuntime::emitSingleRegion(
12649     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12650     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12651     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12652     ArrayRef<const Expr *> AssignmentOps) {
12653   llvm_unreachable("Not supported in SIMD-only mode");
12654 }
12655 
12656 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12657                                             const RegionCodeGenTy &OrderedOpGen,
12658                                             SourceLocation Loc,
12659                                             bool IsThreads) {
12660   llvm_unreachable("Not supported in SIMD-only mode");
12661 }
12662 
12663 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12664                                           SourceLocation Loc,
12665                                           OpenMPDirectiveKind Kind,
12666                                           bool EmitChecks,
12667                                           bool ForceSimpleCall) {
12668   llvm_unreachable("Not supported in SIMD-only mode");
12669 }
12670 
12671 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12672     CodeGenFunction &CGF, SourceLocation Loc,
12673     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12674     bool Ordered, const DispatchRTInput &DispatchValues) {
12675   llvm_unreachable("Not supported in SIMD-only mode");
12676 }
12677 
12678 void CGOpenMPSIMDRuntime::emitForStaticInit(
12679     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12680     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12681   llvm_unreachable("Not supported in SIMD-only mode");
12682 }
12683 
12684 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12685     CodeGenFunction &CGF, SourceLocation Loc,
12686     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12687   llvm_unreachable("Not supported in SIMD-only mode");
12688 }
12689 
12690 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12691                                                      SourceLocation Loc,
12692                                                      unsigned IVSize,
12693                                                      bool IVSigned) {
12694   llvm_unreachable("Not supported in SIMD-only mode");
12695 }
12696 
12697 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12698                                               SourceLocation Loc,
12699                                               OpenMPDirectiveKind DKind) {
12700   llvm_unreachable("Not supported in SIMD-only mode");
12701 }
12702 
12703 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12704                                               SourceLocation Loc,
12705                                               unsigned IVSize, bool IVSigned,
12706                                               Address IL, Address LB,
12707                                               Address UB, Address ST) {
12708   llvm_unreachable("Not supported in SIMD-only mode");
12709 }
12710 
12711 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12712                                                llvm::Value *NumThreads,
12713                                                SourceLocation Loc) {
12714   llvm_unreachable("Not supported in SIMD-only mode");
12715 }
12716 
12717 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12718                                              ProcBindKind ProcBind,
12719                                              SourceLocation Loc) {
12720   llvm_unreachable("Not supported in SIMD-only mode");
12721 }
12722 
12723 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12724                                                     const VarDecl *VD,
12725                                                     Address VDAddr,
12726                                                     SourceLocation Loc) {
12727   llvm_unreachable("Not supported in SIMD-only mode");
12728 }
12729 
12730 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12731     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12732     CodeGenFunction *CGF) {
12733   llvm_unreachable("Not supported in SIMD-only mode");
12734 }
12735 
12736 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12737     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12738   llvm_unreachable("Not supported in SIMD-only mode");
12739 }
12740 
12741 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12742                                     ArrayRef<const Expr *> Vars,
12743                                     SourceLocation Loc,
12744                                     llvm::AtomicOrdering AO) {
12745   llvm_unreachable("Not supported in SIMD-only mode");
12746 }
12747 
12748 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12749                                        const OMPExecutableDirective &D,
12750                                        llvm::Function *TaskFunction,
12751                                        QualType SharedsTy, Address Shareds,
12752                                        const Expr *IfCond,
12753                                        const OMPTaskDataTy &Data) {
12754   llvm_unreachable("Not supported in SIMD-only mode");
12755 }
12756 
12757 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12758     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12759     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12760     const Expr *IfCond, const OMPTaskDataTy &Data) {
12761   llvm_unreachable("Not supported in SIMD-only mode");
12762 }
12763 
12764 void CGOpenMPSIMDRuntime::emitReduction(
12765     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12766     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12767     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12768   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12769   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12770                                  ReductionOps, Options);
12771 }
12772 
12773 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12774     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12775     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12776   llvm_unreachable("Not supported in SIMD-only mode");
12777 }
12778 
12779 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12780                                                 SourceLocation Loc,
12781                                                 bool IsWorksharingReduction) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
12785 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12786                                                   SourceLocation Loc,
12787                                                   ReductionCodeGen &RCG,
12788                                                   unsigned N) {
12789   llvm_unreachable("Not supported in SIMD-only mode");
12790 }
12791 
12792 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12793                                                   SourceLocation Loc,
12794                                                   llvm::Value *ReductionsPtr,
12795                                                   LValue SharedLVal) {
12796   llvm_unreachable("Not supported in SIMD-only mode");
12797 }
12798 
12799 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12800                                            SourceLocation Loc) {
12801   llvm_unreachable("Not supported in SIMD-only mode");
12802 }
12803 
12804 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12805     CodeGenFunction &CGF, SourceLocation Loc,
12806     OpenMPDirectiveKind CancelRegion) {
12807   llvm_unreachable("Not supported in SIMD-only mode");
12808 }
12809 
12810 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12811                                          SourceLocation Loc, const Expr *IfCond,
12812                                          OpenMPDirectiveKind CancelRegion) {
12813   llvm_unreachable("Not supported in SIMD-only mode");
12814 }
12815 
12816 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12817     const OMPExecutableDirective &D, StringRef ParentName,
12818     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12819     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12820   llvm_unreachable("Not supported in SIMD-only mode");
12821 }
12822 
12823 void CGOpenMPSIMDRuntime::emitTargetCall(
12824     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12825     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12826     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12827     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12828                                      const OMPLoopDirective &D)>
12829         SizeEmitter) {
12830   llvm_unreachable("Not supported in SIMD-only mode");
12831 }
12832 
12833 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12834   llvm_unreachable("Not supported in SIMD-only mode");
12835 }
12836 
12837 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12838   llvm_unreachable("Not supported in SIMD-only mode");
12839 }
12840 
12841 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12842   return false;
12843 }
12844 
12845 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12846                                         const OMPExecutableDirective &D,
12847                                         SourceLocation Loc,
12848                                         llvm::Function *OutlinedFn,
12849                                         ArrayRef<llvm::Value *> CapturedVars) {
12850   llvm_unreachable("Not supported in SIMD-only mode");
12851 }
12852 
12853 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12854                                              const Expr *NumTeams,
12855                                              const Expr *ThreadLimit,
12856                                              SourceLocation Loc) {
12857   llvm_unreachable("Not supported in SIMD-only mode");
12858 }
12859 
12860 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12861     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12862     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12863   llvm_unreachable("Not supported in SIMD-only mode");
12864 }
12865 
12866 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12867     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12868     const Expr *Device) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12873                                            const OMPLoopDirective &D,
12874                                            ArrayRef<Expr *> NumIterations) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
12878 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12879                                               const OMPDependClause *C) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 const VarDecl *
12884 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12885                                         const VarDecl *NativeParam) const {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 Address
12890 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12891                                          const VarDecl *NativeParam,
12892                                          const VarDecl *TargetParam) const {
12893   llvm_unreachable("Not supported in SIMD-only mode");
12894 }
12895