1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412   bool NoInheritance = false;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel,
421                           bool NoInheritance = true)
422       : CGF(CGF), NoInheritance(NoInheritance) {
423     // Start emission for the construct.
424     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
425         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
426     if (NoInheritance) {
427       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
429       CGF.LambdaThisCaptureField = nullptr;
430       BlockInfo = CGF.BlockInfo;
431       CGF.BlockInfo = nullptr;
432     }
433   }
434 
435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     if (NoInheritance) {
442       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
443       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
444       CGF.BlockInfo = BlockInfo;
445     }
446   }
447 };
448 
449 /// Values for bit flags used in the ident_t to describe the fields.
450 /// All enumeric elements are named and described in accordance with the code
451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
452 enum OpenMPLocationFlags : unsigned {
453   /// Use trampoline for internal microtask.
454   OMP_IDENT_IMD = 0x01,
455   /// Use c-style ident structure.
456   OMP_IDENT_KMPC = 0x02,
457   /// Atomic reduction option for kmpc_reduce.
458   OMP_ATOMIC_REDUCE = 0x10,
459   /// Explicit 'barrier' directive.
460   OMP_IDENT_BARRIER_EXPL = 0x20,
461   /// Implicit barrier in code.
462   OMP_IDENT_BARRIER_IMPL = 0x40,
463   /// Implicit barrier in 'for' directive.
464   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
465   /// Implicit barrier in 'sections' directive.
466   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
467   /// Implicit barrier in 'single' directive.
468   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
469   /// Call of __kmp_for_static_init for static loop.
470   OMP_IDENT_WORK_LOOP = 0x200,
471   /// Call of __kmp_for_static_init for sections.
472   OMP_IDENT_WORK_SECTIONS = 0x400,
473   /// Call of __kmp_for_static_init for distribute.
474   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
475   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
476 };
477 
478 namespace {
479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
480 /// Values for bit flags for marking which requires clauses have been used.
481 enum OpenMPOffloadingRequiresDirFlags : int64_t {
482   /// flag undefined.
483   OMP_REQ_UNDEFINED               = 0x000,
484   /// no requires clause present.
485   OMP_REQ_NONE                    = 0x001,
486   /// reverse_offload clause.
487   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
488   /// unified_address clause.
489   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
490   /// unified_shared_memory clause.
491   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
492   /// dynamic_allocators clause.
493   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
494   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
495 };
496 
497 enum OpenMPOffloadingReservedDeviceIDs {
498   /// Device ID if the device was not defined, runtime should get it
499   /// from environment variables in the spec.
500   OMP_DEVICEID_UNDEF = -1,
501 };
502 } // anonymous namespace
503 
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
510 ///                                  see above  */
511 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
512 ///                                  KMP_IDENT_KMPC identifies this union
513 ///                                  member  */
514 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
515 ///                                  see above */
516 ///#if USE_ITT_BUILD
517 ///                            /*  but currently used for storing
518 ///                                region-specific ITT */
519 ///                            /*  contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
522 ///                                 C++  */
523 ///    char const *psource;    /**< String describing the source location.
524 ///                            The string is composed of semi-colon separated
525 //                             fields which describe the source file,
526 ///                            the function and a pair of line numbers that
527 ///                            delimit the construct.
528 ///                             */
529 /// } ident_t;
530 enum IdentFieldIndex {
531   /// might be used in Fortran
532   IdentField_Reserved_1,
533   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534   IdentField_Flags,
535   /// Not really used in Fortran any more
536   IdentField_Reserved_2,
537   /// Source[4] in Fortran, do not use for C++
538   IdentField_Reserved_3,
539   /// String describing the source location. The string is composed of
540   /// semi-colon separated fields which describe the source file, the function
541   /// and a pair of line numbers that delimit the construct.
542   IdentField_PSource
543 };
544 
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548   /// Lower bound for default (unordered) versions.
549   OMP_sch_lower = 32,
550   OMP_sch_static_chunked = 33,
551   OMP_sch_static = 34,
552   OMP_sch_dynamic_chunked = 35,
553   OMP_sch_guided_chunked = 36,
554   OMP_sch_runtime = 37,
555   OMP_sch_auto = 38,
556   /// static with chunk adjustment (e.g., simd)
557   OMP_sch_static_balanced_chunked = 45,
558   /// Lower bound for 'ordered' versions.
559   OMP_ord_lower = 64,
560   OMP_ord_static_chunked = 65,
561   OMP_ord_static = 66,
562   OMP_ord_dynamic_chunked = 67,
563   OMP_ord_guided_chunked = 68,
564   OMP_ord_runtime = 69,
565   OMP_ord_auto = 70,
566   OMP_sch_default = OMP_sch_static,
567   /// dist_schedule types
568   OMP_dist_sch_static_chunked = 91,
569   OMP_dist_sch_static = 92,
570   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571   /// Set if the monotonic schedule modifier was present.
572   OMP_sch_modifier_monotonic = (1 << 29),
573   /// Set if the nonmonotonic schedule modifier was present.
574   OMP_sch_modifier_nonmonotonic = (1 << 30),
575 };
576 
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580   PrePostActionTy *Action;
581 
582 public:
583   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585     if (!CGF.HaveInsertPoint())
586       return;
587     Action->Exit(CGF);
588   }
589 };
590 
591 } // anonymous namespace
592 
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594   CodeGenFunction::RunCleanupsScope Scope(CGF);
595   if (PrePostAction) {
596     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597     Callback(CodeGen, CGF, *PrePostAction);
598   } else {
599     PrePostActionTy Action;
600     Callback(CodeGen, CGF, Action);
601   }
602 }
603 
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610       if (const auto *DRE =
611               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613           return DRD;
614   return nullptr;
615 }
616 
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618                                              const OMPDeclareReductionDecl *DRD,
619                                              const Expr *InitOp,
620                                              Address Private, Address Original,
621                                              QualType Ty) {
622   if (DRD->getInitializer()) {
623     std::pair<llvm::Function *, llvm::Function *> Reduction =
624         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625     const auto *CE = cast<CallExpr>(InitOp);
626     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629     const auto *LHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631     const auto *RHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
635                             [=]() { return Private; });
636     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
637                             [=]() { return Original; });
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate:
659       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
660       break;
661     }
662     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
663     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
664     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
665                          /*IsInitializer=*/false);
666   }
667 }
668 
669 /// Emit initialization of arrays of complex types.
670 /// \param DestAddr Address of the array.
671 /// \param Type Type of array.
672 /// \param Init Initial expression of array.
673 /// \param SrcAddr Address of the original array.
674 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
675                                  QualType Type, bool EmitDeclareReductionInit,
676                                  const Expr *Init,
677                                  const OMPDeclareReductionDecl *DRD,
678                                  Address SrcAddr = Address::invalid()) {
679   // Perform element-by-element initialization.
680   QualType ElementTy;
681 
682   // Drill down to the base element type on both arrays.
683   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
684   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
685   DestAddr =
686       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
687   if (DRD)
688     SrcAddr =
689         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
690 
691   llvm::Value *SrcBegin = nullptr;
692   if (DRD)
693     SrcBegin = SrcAddr.getPointer();
694   llvm::Value *DestBegin = DestAddr.getPointer();
695   // Cast from pointer to array type to pointer to single element.
696   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
697   // The basic structure here is a while-do loop.
698   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
699   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
700   llvm::Value *IsEmpty =
701       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
702   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
703 
704   // Enter the loop body, making that address the current address.
705   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
706   CGF.EmitBlock(BodyBB);
707 
708   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
709 
710   llvm::PHINode *SrcElementPHI = nullptr;
711   Address SrcElementCurrent = Address::invalid();
712   if (DRD) {
713     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
714                                           "omp.arraycpy.srcElementPast");
715     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716     SrcElementCurrent =
717         Address(SrcElementPHI,
718                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
719   }
720   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
721       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722   DestElementPHI->addIncoming(DestBegin, EntryBB);
723   Address DestElementCurrent =
724       Address(DestElementPHI,
725               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726 
727   // Emit copy.
728   {
729     CodeGenFunction::RunCleanupsScope InitScope(CGF);
730     if (EmitDeclareReductionInit) {
731       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
732                                        SrcElementCurrent, ElementTy);
733     } else
734       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
735                            /*IsInitializer=*/false);
736   }
737 
738   if (DRD) {
739     // Shift the address forward by one element.
740     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
741         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
743   }
744 
745   // Shift the address forward by one element.
746   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
747       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
748   // Check whether we've reached the end.
749   llvm::Value *Done =
750       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
751   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
752   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
753 
754   // Done.
755   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
756 }
757 
758 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
759   return CGF.EmitOMPSharedLValue(E);
760 }
761 
762 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
763                                             const Expr *E) {
764   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
765     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
766   return LValue();
767 }
768 
769 void ReductionCodeGen::emitAggregateInitialization(
770     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
771     const OMPDeclareReductionDecl *DRD) {
772   // Emit VarDecl with copy init for arrays.
773   // Get the address of the original variable captured in current
774   // captured region.
775   const auto *PrivateVD =
776       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
777   bool EmitDeclareReductionInit =
778       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
779   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
780                        EmitDeclareReductionInit,
781                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
782                                                 : PrivateVD->getInit(),
783                        DRD, SharedLVal.getAddress(CGF));
784 }
785 
786 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
787                                    ArrayRef<const Expr *> Origs,
788                                    ArrayRef<const Expr *> Privates,
789                                    ArrayRef<const Expr *> ReductionOps) {
790   ClausesData.reserve(Shareds.size());
791   SharedAddresses.reserve(Shareds.size());
792   Sizes.reserve(Shareds.size());
793   BaseDecls.reserve(Shareds.size());
794   const auto *IOrig = Origs.begin();
795   const auto *IPriv = Privates.begin();
796   const auto *IRed = ReductionOps.begin();
797   for (const Expr *Ref : Shareds) {
798     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
799     std::advance(IOrig, 1);
800     std::advance(IPriv, 1);
801     std::advance(IRed, 1);
802   }
803 }
804 
805 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
806   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
807          "Number of generated lvalues must be exactly N.");
808   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
809   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
810   SharedAddresses.emplace_back(First, Second);
811   if (ClausesData[N].Shared == ClausesData[N].Ref) {
812     OrigAddresses.emplace_back(First, Second);
813   } else {
814     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
815     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
816     OrigAddresses.emplace_back(First, Second);
817   }
818 }
819 
820 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
821   const auto *PrivateVD =
822       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
823   QualType PrivateType = PrivateVD->getType();
824   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
825   if (!PrivateType->isVariablyModifiedType()) {
826     Sizes.emplace_back(
827         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
828         nullptr);
829     return;
830   }
831   llvm::Value *Size;
832   llvm::Value *SizeInChars;
833   auto *ElemType =
834       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
835           ->getElementType();
836   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
837   if (AsArraySection) {
838     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
839                                      OrigAddresses[N].first.getPointer(CGF));
840     Size = CGF.Builder.CreateNUWAdd(
841         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
842     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
843   } else {
844     SizeInChars =
845         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
846     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
847   }
848   Sizes.emplace_back(SizeInChars, Size);
849   CodeGenFunction::OpaqueValueMapping OpaqueMap(
850       CGF,
851       cast<OpaqueValueExpr>(
852           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
853       RValue::get(Size));
854   CGF.EmitVariablyModifiedType(PrivateType);
855 }
856 
857 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
858                                          llvm::Value *Size) {
859   const auto *PrivateVD =
860       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
861   QualType PrivateType = PrivateVD->getType();
862   if (!PrivateType->isVariablyModifiedType()) {
863     assert(!Size && !Sizes[N].second &&
864            "Size should be nullptr for non-variably modified reduction "
865            "items.");
866     return;
867   }
868   CodeGenFunction::OpaqueValueMapping OpaqueMap(
869       CGF,
870       cast<OpaqueValueExpr>(
871           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
872       RValue::get(Size));
873   CGF.EmitVariablyModifiedType(PrivateType);
874 }
875 
876 void ReductionCodeGen::emitInitialization(
877     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
878     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
879   assert(SharedAddresses.size() > N && "No variable was generated");
880   const auto *PrivateVD =
881       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
882   const OMPDeclareReductionDecl *DRD =
883       getReductionInit(ClausesData[N].ReductionOp);
884   QualType PrivateType = PrivateVD->getType();
885   PrivateAddr = CGF.Builder.CreateElementBitCast(
886       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
887   QualType SharedType = SharedAddresses[N].first.getType();
888   SharedLVal = CGF.MakeAddrLValue(
889       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
890                                        CGF.ConvertTypeForMem(SharedType)),
891       SharedType, SharedAddresses[N].first.getBaseInfo(),
892       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
893   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894     if (DRD && DRD->getInitializer())
895       (void)DefaultInit(CGF);
896     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
897   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898     (void)DefaultInit(CGF);
899     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
900                                      PrivateAddr, SharedLVal.getAddress(CGF),
901                                      SharedLVal.getType());
902   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905                          PrivateVD->getType().getQualifiers(),
906                          /*IsInitializer=*/false);
907   }
908 }
909 
910 bool ReductionCodeGen::needCleanups(unsigned N) {
911   const auto *PrivateVD =
912       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913   QualType PrivateType = PrivateVD->getType();
914   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915   return DTorKind != QualType::DK_none;
916 }
917 
918 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919                                     Address PrivateAddr) {
920   const auto *PrivateVD =
921       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922   QualType PrivateType = PrivateVD->getType();
923   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924   if (needCleanups(N)) {
925     PrivateAddr = CGF.Builder.CreateElementBitCast(
926         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928   }
929 }
930 
931 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932                           LValue BaseLV) {
933   BaseTy = BaseTy.getNonReferenceType();
934   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938     } else {
939       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941     }
942     BaseTy = BaseTy->getPointeeType();
943   }
944   return CGF.MakeAddrLValue(
945       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946                                        CGF.ConvertTypeForMem(ElTy)),
947       BaseLV.getType(), BaseLV.getBaseInfo(),
948       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949 }
950 
951 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953                           llvm::Value *Addr) {
954   Address Tmp = Address::invalid();
955   Address TopTmp = Address::invalid();
956   Address MostTopTmp = Address::invalid();
957   BaseTy = BaseTy.getNonReferenceType();
958   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960     Tmp = CGF.CreateMemTemp(BaseTy);
961     if (TopTmp.isValid())
962       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963     else
964       MostTopTmp = Tmp;
965     TopTmp = Tmp;
966     BaseTy = BaseTy->getPointeeType();
967   }
968   llvm::Type *Ty = BaseLVType;
969   if (Tmp.isValid())
970     Ty = Tmp.getElementType();
971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972   if (Tmp.isValid()) {
973     CGF.Builder.CreateStore(Addr, Tmp);
974     return MostTopTmp;
975   }
976   return Address(Addr, BaseLVAlignment);
977 }
978 
979 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980   const VarDecl *OrigVD = nullptr;
981   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984       Base = TempOASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992       Base = TempASE->getBase()->IgnoreParenImpCasts();
993     DE = cast<DeclRefExpr>(Base);
994     OrigVD = cast<VarDecl>(DE->getDecl());
995   }
996   return OrigVD;
997 }
998 
999 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000                                                Address PrivateAddr) {
1001   const DeclRefExpr *DE;
1002   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003     BaseDecls.emplace_back(OrigVD);
1004     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005     LValue BaseLValue =
1006         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007                     OriginalBaseLValue);
1008     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1009         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1010     llvm::Value *PrivatePointer =
1011         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1012             PrivateAddr.getPointer(),
1013             SharedAddresses[N].first.getAddress(CGF).getType());
1014     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1015     return castToBase(CGF, OrigVD->getType(),
1016                       SharedAddresses[N].first.getType(),
1017                       OriginalBaseLValue.getAddress(CGF).getType(),
1018                       OriginalBaseLValue.getAlignment(), Ptr);
1019   }
1020   BaseDecls.emplace_back(
1021       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1022   return PrivateAddr;
1023 }
1024 
1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1026   const OMPDeclareReductionDecl *DRD =
1027       getReductionInit(ClausesData[N].ReductionOp);
1028   return DRD && DRD->getInitializer();
1029 }
1030 
1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1032   return CGF.EmitLoadOfPointerLValue(
1033       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1034       getThreadIDVariable()->getType()->castAs<PointerType>());
1035 }
1036 
1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1038   if (!CGF.HaveInsertPoint())
1039     return;
1040   // 1.2.2 OpenMP Language Terminology
1041   // Structured block - An executable statement with a single entry at the
1042   // top and a single exit at the bottom.
1043   // The point of exit cannot be a branch out of the structured block.
1044   // longjmp() and throw() must not violate the entry/exit criteria.
1045   CGF.EHStack.pushTerminate();
1046   CodeGen(CGF);
1047   CGF.EHStack.popTerminate();
1048 }
1049 
1050 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1051     CodeGenFunction &CGF) {
1052   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1053                             getThreadIDVariable()->getType(),
1054                             AlignmentSource::Decl);
1055 }
1056 
1057 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1058                                        QualType FieldTy) {
1059   auto *Field = FieldDecl::Create(
1060       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1061       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1062       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1063   Field->setAccess(AS_public);
1064   DC->addDecl(Field);
1065   return Field;
1066 }
1067 
1068 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1069                                  StringRef Separator)
1070     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1071       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1072   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1073 
1074   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1075   OMPBuilder.initialize();
1076   loadOffloadInfoMetadata();
1077 }
1078 
1079 void CGOpenMPRuntime::clear() {
1080   InternalVars.clear();
1081   // Clean non-target variable declarations possibly used only in debug info.
1082   for (const auto &Data : EmittedNonTargetVariables) {
1083     if (!Data.getValue().pointsToAliveValue())
1084       continue;
1085     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1086     if (!GV)
1087       continue;
1088     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1089       continue;
1090     GV->eraseFromParent();
1091   }
1092 }
1093 
1094 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1095   SmallString<128> Buffer;
1096   llvm::raw_svector_ostream OS(Buffer);
1097   StringRef Sep = FirstSeparator;
1098   for (StringRef Part : Parts) {
1099     OS << Sep << Part;
1100     Sep = Separator;
1101   }
1102   return std::string(OS.str());
1103 }
1104 
1105 static llvm::Function *
1106 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1107                           const Expr *CombinerInitializer, const VarDecl *In,
1108                           const VarDecl *Out, bool IsCombiner) {
1109   // void .omp_combiner.(Ty *in, Ty *out);
1110   ASTContext &C = CGM.getContext();
1111   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1112   FunctionArgList Args;
1113   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1114                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1115   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1116                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1117   Args.push_back(&OmpOutParm);
1118   Args.push_back(&OmpInParm);
1119   const CGFunctionInfo &FnInfo =
1120       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1121   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1122   std::string Name = CGM.getOpenMPRuntime().getName(
1123       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1124   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1125                                     Name, &CGM.getModule());
1126   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1127   if (CGM.getLangOpts().Optimize) {
1128     Fn->removeFnAttr(llvm::Attribute::NoInline);
1129     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1130     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1131   }
1132   CodeGenFunction CGF(CGM);
1133   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1134   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1135   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1136                     Out->getLocation());
1137   CodeGenFunction::OMPPrivateScope Scope(CGF);
1138   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1139   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1140     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1141         .getAddress(CGF);
1142   });
1143   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1144   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1145     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1146         .getAddress(CGF);
1147   });
1148   (void)Scope.Privatize();
1149   if (!IsCombiner && Out->hasInit() &&
1150       !CGF.isTrivialInitializer(Out->getInit())) {
1151     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1152                          Out->getType().getQualifiers(),
1153                          /*IsInitializer=*/true);
1154   }
1155   if (CombinerInitializer)
1156     CGF.EmitIgnoredExpr(CombinerInitializer);
1157   Scope.ForceCleanup();
1158   CGF.FinishFunction();
1159   return Fn;
1160 }
1161 
1162 void CGOpenMPRuntime::emitUserDefinedReduction(
1163     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1164   if (UDRMap.count(D) > 0)
1165     return;
1166   llvm::Function *Combiner = emitCombinerOrInitializer(
1167       CGM, D->getType(), D->getCombiner(),
1168       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1169       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1170       /*IsCombiner=*/true);
1171   llvm::Function *Initializer = nullptr;
1172   if (const Expr *Init = D->getInitializer()) {
1173     Initializer = emitCombinerOrInitializer(
1174         CGM, D->getType(),
1175         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1176                                                                      : nullptr,
1177         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1178         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1179         /*IsCombiner=*/false);
1180   }
1181   UDRMap.try_emplace(D, Combiner, Initializer);
1182   if (CGF) {
1183     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1184     Decls.second.push_back(D);
1185   }
1186 }
1187 
1188 std::pair<llvm::Function *, llvm::Function *>
1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1190   auto I = UDRMap.find(D);
1191   if (I != UDRMap.end())
1192     return I->second;
1193   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1194   return UDRMap.lookup(D);
1195 }
1196 
1197 namespace {
1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1199 // Builder if one is present.
1200 struct PushAndPopStackRAII {
1201   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1202                       bool HasCancel)
1203       : OMPBuilder(OMPBuilder) {
1204     if (!OMPBuilder)
1205       return;
1206 
1207     // The following callback is the crucial part of clangs cleanup process.
1208     //
1209     // NOTE:
1210     // Once the OpenMPIRBuilder is used to create parallel regions (and
1211     // similar), the cancellation destination (Dest below) is determined via
1212     // IP. That means if we have variables to finalize we split the block at IP,
1213     // use the new block (=BB) as destination to build a JumpDest (via
1214     // getJumpDestInCurrentScope(BB)) which then is fed to
1215     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1216     // to push & pop an FinalizationInfo object.
1217     // The FiniCB will still be needed but at the point where the
1218     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1219     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1220       assert(IP.getBlock()->end() == IP.getPoint() &&
1221              "Clang CG should cause non-terminated block!");
1222       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1223       CGF.Builder.restoreIP(IP);
1224       CodeGenFunction::JumpDest Dest =
1225           CGF.getOMPCancelDestination(OMPD_parallel);
1226       CGF.EmitBranchThroughCleanup(Dest);
1227     };
1228 
1229     // TODO: Remove this once we emit parallel regions through the
1230     //       OpenMPIRBuilder as it can do this setup internally.
1231     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1232         {FiniCB, OMPD_parallel, HasCancel});
1233     OMPBuilder->pushFinalizationCB(std::move(FI));
1234   }
1235   ~PushAndPopStackRAII() {
1236     if (OMPBuilder)
1237       OMPBuilder->popFinalizationCB();
1238   }
1239   llvm::OpenMPIRBuilder *OMPBuilder;
1240 };
1241 } // namespace
1242 
1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1244     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1245     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1246     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1247   assert(ThreadIDVar->getType()->isPointerType() &&
1248          "thread id variable must be of type kmp_int32 *");
1249   CodeGenFunction CGF(CGM, true);
1250   bool HasCancel = false;
1251   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1252     HasCancel = OPD->hasCancel();
1253   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1254     HasCancel = OPD->hasCancel();
1255   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1256     HasCancel = OPSD->hasCancel();
1257   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1258     HasCancel = OPFD->hasCancel();
1259   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1260     HasCancel = OPFD->hasCancel();
1261   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263   else if (const auto *OPFD =
1264                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269 
1270   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1271   //       parallel region to make cancellation barriers work properly.
1272   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1273   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1274   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1275                                     HasCancel, OutlinedHelperName);
1276   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1277   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1278 }
1279 
1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1281     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1282     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1283   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1284   return emitParallelOrTeamsOutlinedFunction(
1285       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1286 }
1287 
1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1289     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1291   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1292   return emitParallelOrTeamsOutlinedFunction(
1293       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1294 }
1295 
1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1297     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1298     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1300     bool Tied, unsigned &NumberOfParts) {
1301   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1302                                               PrePostActionTy &) {
1303     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1304     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1305     llvm::Value *TaskArgs[] = {
1306         UpLoc, ThreadID,
1307         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1308                                     TaskTVar->getType()->castAs<PointerType>())
1309             .getPointer(CGF)};
1310     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1311                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1312                         TaskArgs);
1313   };
1314   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1315                                                             UntiedCodeGen);
1316   CodeGen.setAction(Action);
1317   assert(!ThreadIDVar->getType()->isPointerType() &&
1318          "thread id variable must be of type kmp_int32 for tasks");
1319   const OpenMPDirectiveKind Region =
1320       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1321                                                       : OMPD_task;
1322   const CapturedStmt *CS = D.getCapturedStmt(Region);
1323   bool HasCancel = false;
1324   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1327     HasCancel = TD->hasCancel();
1328   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1329     HasCancel = TD->hasCancel();
1330   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332 
1333   CodeGenFunction CGF(CGM, true);
1334   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1335                                         InnermostKind, HasCancel, Action);
1336   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1337   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1338   if (!Tied)
1339     NumberOfParts = Action.getNumberOfParts();
1340   return Res;
1341 }
1342 
1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1344                              const RecordDecl *RD, const CGRecordLayout &RL,
1345                              ArrayRef<llvm::Constant *> Data) {
1346   llvm::StructType *StructTy = RL.getLLVMType();
1347   unsigned PrevIdx = 0;
1348   ConstantInitBuilder CIBuilder(CGM);
1349   auto DI = Data.begin();
1350   for (const FieldDecl *FD : RD->fields()) {
1351     unsigned Idx = RL.getLLVMFieldNo(FD);
1352     // Fill the alignment.
1353     for (unsigned I = PrevIdx; I < Idx; ++I)
1354       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1355     PrevIdx = Idx + 1;
1356     Fields.add(*DI);
1357     ++DI;
1358   }
1359 }
1360 
1361 template <class... As>
1362 static llvm::GlobalVariable *
1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1364                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1365                    As &&... Args) {
1366   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1367   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1368   ConstantInitBuilder CIBuilder(CGM);
1369   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1370   buildStructValue(Fields, CGM, RD, RL, Data);
1371   return Fields.finishAndCreateGlobal(
1372       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1373       std::forward<As>(Args)...);
1374 }
1375 
1376 template <typename T>
1377 static void
1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1379                                          ArrayRef<llvm::Constant *> Data,
1380                                          T &Parent) {
1381   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1382   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1383   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1384   buildStructValue(Fields, CGM, RD, RL, Data);
1385   Fields.finishAndAddTo(Parent);
1386 }
1387 
1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1389                                              bool AtCurrentPoint) {
1390   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1391   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1392 
1393   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1394   if (AtCurrentPoint) {
1395     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1396         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1397   } else {
1398     Elem.second.ServiceInsertPt =
1399         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1400     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1401   }
1402 }
1403 
1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1405   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1406   if (Elem.second.ServiceInsertPt) {
1407     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1408     Elem.second.ServiceInsertPt = nullptr;
1409     Ptr->eraseFromParent();
1410   }
1411 }
1412 
1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1414                                                   SourceLocation Loc,
1415                                                   SmallString<128> &Buffer) {
1416   llvm::raw_svector_ostream OS(Buffer);
1417   // Build debug location
1418   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1419   OS << ";" << PLoc.getFilename() << ";";
1420   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1421     OS << FD->getQualifiedNameAsString();
1422   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1423   return OS.str();
1424 }
1425 
1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1427                                                  SourceLocation Loc,
1428                                                  unsigned Flags) {
1429   llvm::Constant *SrcLocStr;
1430   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1431       Loc.isInvalid()) {
1432     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1433   } else {
1434     std::string FunctionName = "";
1435     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1436       FunctionName = FD->getQualifiedNameAsString();
1437     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1438     const char *FileName = PLoc.getFilename();
1439     unsigned Line = PLoc.getLine();
1440     unsigned Column = PLoc.getColumn();
1441     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1442                                                 Line, Column);
1443   }
1444   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1445   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1446                                      Reserved2Flags);
1447 }
1448 
1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1450                                           SourceLocation Loc) {
1451   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1452   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1453   // the clang invariants used below might be broken.
1454   if (CGM.getLangOpts().OpenMPIRBuilder) {
1455     SmallString<128> Buffer;
1456     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1457     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1458         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1459     return OMPBuilder.getOrCreateThreadID(
1460         OMPBuilder.getOrCreateIdent(SrcLocStr));
1461   }
1462 
1463   llvm::Value *ThreadID = nullptr;
1464   // Check whether we've already cached a load of the thread id in this
1465   // function.
1466   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1467   if (I != OpenMPLocThreadIDMap.end()) {
1468     ThreadID = I->second.ThreadID;
1469     if (ThreadID != nullptr)
1470       return ThreadID;
1471   }
1472   // If exceptions are enabled, do not use parameter to avoid possible crash.
1473   if (auto *OMPRegionInfo =
1474           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1475     if (OMPRegionInfo->getThreadIDVariable()) {
1476       // Check if this an outlined function with thread id passed as argument.
1477       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1478       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1479       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1480           !CGF.getLangOpts().CXXExceptions ||
1481           CGF.Builder.GetInsertBlock() == TopBlock ||
1482           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1483           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1484               TopBlock ||
1485           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1486               CGF.Builder.GetInsertBlock()) {
1487         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1488         // If value loaded in entry block, cache it and use it everywhere in
1489         // function.
1490         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1491           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1492           Elem.second.ThreadID = ThreadID;
1493         }
1494         return ThreadID;
1495       }
1496     }
1497   }
1498 
1499   // This is not an outlined function region - need to call __kmpc_int32
1500   // kmpc_global_thread_num(ident_t *loc).
1501   // Generate thread id value and cache this value for use across the
1502   // function.
1503   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1504   if (!Elem.second.ServiceInsertPt)
1505     setLocThreadIdInsertPt(CGF);
1506   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1507   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1508   llvm::CallInst *Call = CGF.Builder.CreateCall(
1509       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1510                                             OMPRTL___kmpc_global_thread_num),
1511       emitUpdateLocation(CGF, Loc));
1512   Call->setCallingConv(CGF.getRuntimeCC());
1513   Elem.second.ThreadID = Call;
1514   return Call;
1515 }
1516 
1517 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1520     clearLocThreadIdInsertPt(CGF);
1521     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1522   }
1523   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1524     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1525       UDRMap.erase(D);
1526     FunctionUDRMap.erase(CGF.CurFn);
1527   }
1528   auto I = FunctionUDMMap.find(CGF.CurFn);
1529   if (I != FunctionUDMMap.end()) {
1530     for(const auto *D : I->second)
1531       UDMMap.erase(D);
1532     FunctionUDMMap.erase(I);
1533   }
1534   LastprivateConditionalToTypes.erase(CGF.CurFn);
1535   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1536 }
1537 
1538 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1539   return OMPBuilder.IdentPtr;
1540 }
1541 
1542 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1543   if (!Kmpc_MicroTy) {
1544     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1545     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1546                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1547     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1548   }
1549   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1550 }
1551 
1552 llvm::FunctionCallee
1553 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1554   assert((IVSize == 32 || IVSize == 64) &&
1555          "IV size is not compatible with the omp runtime");
1556   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1557                                             : "__kmpc_for_static_init_4u")
1558                                 : (IVSigned ? "__kmpc_for_static_init_8"
1559                                             : "__kmpc_for_static_init_8u");
1560   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562   llvm::Type *TypeParams[] = {
1563     getIdentTyPointerTy(),                     // loc
1564     CGM.Int32Ty,                               // tid
1565     CGM.Int32Ty,                               // schedtype
1566     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567     PtrTy,                                     // p_lower
1568     PtrTy,                                     // p_upper
1569     PtrTy,                                     // p_stride
1570     ITy,                                       // incr
1571     ITy                                        // chunk
1572   };
1573   auto *FnTy =
1574       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575   return CGM.CreateRuntimeFunction(FnTy, Name);
1576 }
1577 
1578 llvm::FunctionCallee
1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580   assert((IVSize == 32 || IVSize == 64) &&
1581          "IV size is not compatible with the omp runtime");
1582   StringRef Name =
1583       IVSize == 32
1584           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588                                CGM.Int32Ty,           // tid
1589                                CGM.Int32Ty,           // schedtype
1590                                ITy,                   // lower
1591                                ITy,                   // upper
1592                                ITy,                   // stride
1593                                ITy                    // chunk
1594   };
1595   auto *FnTy =
1596       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597   return CGM.CreateRuntimeFunction(FnTy, Name);
1598 }
1599 
1600 llvm::FunctionCallee
1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602   assert((IVSize == 32 || IVSize == 64) &&
1603          "IV size is not compatible with the omp runtime");
1604   StringRef Name =
1605       IVSize == 32
1606           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608   llvm::Type *TypeParams[] = {
1609       getIdentTyPointerTy(), // loc
1610       CGM.Int32Ty,           // tid
1611   };
1612   auto *FnTy =
1613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614   return CGM.CreateRuntimeFunction(FnTy, Name);
1615 }
1616 
1617 llvm::FunctionCallee
1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619   assert((IVSize == 32 || IVSize == 64) &&
1620          "IV size is not compatible with the omp runtime");
1621   StringRef Name =
1622       IVSize == 32
1623           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627   llvm::Type *TypeParams[] = {
1628     getIdentTyPointerTy(),                     // loc
1629     CGM.Int32Ty,                               // tid
1630     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631     PtrTy,                                     // p_lower
1632     PtrTy,                                     // p_upper
1633     PtrTy                                      // p_stride
1634   };
1635   auto *FnTy =
1636       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637   return CGM.CreateRuntimeFunction(FnTy, Name);
1638 }
1639 
1640 /// Obtain information that uniquely identifies a target entry. This
1641 /// consists of the file and device IDs as well as line number associated with
1642 /// the relevant entry source location.
1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644                                      unsigned &DeviceID, unsigned &FileID,
1645                                      unsigned &LineNum) {
1646   SourceManager &SM = C.getSourceManager();
1647 
1648   // The loc should be always valid and have a file ID (the user cannot use
1649   // #pragma directives in macros)
1650 
1651   assert(Loc.isValid() && "Source location is expected to be always valid.");
1652 
1653   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1655 
1656   llvm::sys::fs::UniqueID ID;
1657   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1658     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1659     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1660     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1661       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1662           << PLoc.getFilename() << EC.message();
1663   }
1664 
1665   DeviceID = ID.getDevice();
1666   FileID = ID.getFile();
1667   LineNum = PLoc.getLine();
1668 }
1669 
1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1671   if (CGM.getLangOpts().OpenMPSimd)
1672     return Address::invalid();
1673   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1674       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1675   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1676               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1677                HasRequiresUnifiedSharedMemory))) {
1678     SmallString<64> PtrName;
1679     {
1680       llvm::raw_svector_ostream OS(PtrName);
1681       OS << CGM.getMangledName(GlobalDecl(VD));
1682       if (!VD->isExternallyVisible()) {
1683         unsigned DeviceID, FileID, Line;
1684         getTargetEntryUniqueInfo(CGM.getContext(),
1685                                  VD->getCanonicalDecl()->getBeginLoc(),
1686                                  DeviceID, FileID, Line);
1687         OS << llvm::format("_%x", FileID);
1688       }
1689       OS << "_decl_tgt_ref_ptr";
1690     }
1691     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1692     if (!Ptr) {
1693       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1694       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1695                                         PtrName);
1696 
1697       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1698       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1699 
1700       if (!CGM.getLangOpts().OpenMPIsDevice)
1701         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1702       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1703     }
1704     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1705   }
1706   return Address::invalid();
1707 }
1708 
1709 llvm::Constant *
1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1711   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1712          !CGM.getContext().getTargetInfo().isTLSSupported());
1713   // Lookup the entry, lazily creating it if necessary.
1714   std::string Suffix = getName({"cache", ""});
1715   return getOrCreateInternalVariable(
1716       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1717 }
1718 
1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1720                                                 const VarDecl *VD,
1721                                                 Address VDAddr,
1722                                                 SourceLocation Loc) {
1723   if (CGM.getLangOpts().OpenMPUseTLS &&
1724       CGM.getContext().getTargetInfo().isTLSSupported())
1725     return VDAddr;
1726 
1727   llvm::Type *VarTy = VDAddr.getElementType();
1728   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1729                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1730                                                        CGM.Int8PtrTy),
1731                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1732                          getOrCreateThreadPrivateCache(VD)};
1733   return Address(CGF.EmitRuntimeCall(
1734                      OMPBuilder.getOrCreateRuntimeFunction(
1735                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1736                      Args),
1737                  VDAddr.getAlignment());
1738 }
1739 
1740 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1741     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1742     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1743   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1744   // library.
1745   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1746   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1747                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1748                       OMPLoc);
1749   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1750   // to register constructor/destructor for variable.
1751   llvm::Value *Args[] = {
1752       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1753       Ctor, CopyCtor, Dtor};
1754   CGF.EmitRuntimeCall(
1755       OMPBuilder.getOrCreateRuntimeFunction(
1756           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1757       Args);
1758 }
1759 
1760 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1761     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1762     bool PerformInit, CodeGenFunction *CGF) {
1763   if (CGM.getLangOpts().OpenMPUseTLS &&
1764       CGM.getContext().getTargetInfo().isTLSSupported())
1765     return nullptr;
1766 
1767   VD = VD->getDefinition(CGM.getContext());
1768   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1769     QualType ASTTy = VD->getType();
1770 
1771     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1772     const Expr *Init = VD->getAnyInitializer();
1773     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1774       // Generate function that re-emits the declaration's initializer into the
1775       // threadprivate copy of the variable VD
1776       CodeGenFunction CtorCGF(CGM);
1777       FunctionArgList Args;
1778       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1779                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1780                             ImplicitParamDecl::Other);
1781       Args.push_back(&Dst);
1782 
1783       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1784           CGM.getContext().VoidPtrTy, Args);
1785       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1786       std::string Name = getName({"__kmpc_global_ctor_", ""});
1787       llvm::Function *Fn =
1788           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1789       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1790                             Args, Loc, Loc);
1791       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1792           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1793           CGM.getContext().VoidPtrTy, Dst.getLocation());
1794       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1795       Arg = CtorCGF.Builder.CreateElementBitCast(
1796           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1797       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1798                                /*IsInitializer=*/true);
1799       ArgVal = CtorCGF.EmitLoadOfScalar(
1800           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1801           CGM.getContext().VoidPtrTy, Dst.getLocation());
1802       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1803       CtorCGF.FinishFunction();
1804       Ctor = Fn;
1805     }
1806     if (VD->getType().isDestructedType() != QualType::DK_none) {
1807       // Generate function that emits destructor call for the threadprivate copy
1808       // of the variable VD
1809       CodeGenFunction DtorCGF(CGM);
1810       FunctionArgList Args;
1811       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1812                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1813                             ImplicitParamDecl::Other);
1814       Args.push_back(&Dst);
1815 
1816       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1817           CGM.getContext().VoidTy, Args);
1818       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1819       std::string Name = getName({"__kmpc_global_dtor_", ""});
1820       llvm::Function *Fn =
1821           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1822       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1823       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1824                             Loc, Loc);
1825       // Create a scope with an artificial location for the body of this function.
1826       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1827       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1828           DtorCGF.GetAddrOfLocalVar(&Dst),
1829           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1830       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1831                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1832                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1833       DtorCGF.FinishFunction();
1834       Dtor = Fn;
1835     }
1836     // Do not emit init function if it is not required.
1837     if (!Ctor && !Dtor)
1838       return nullptr;
1839 
1840     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1841     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1842                                                /*isVarArg=*/false)
1843                            ->getPointerTo();
1844     // Copying constructor for the threadprivate variable.
1845     // Must be NULL - reserved by runtime, but currently it requires that this
1846     // parameter is always NULL. Otherwise it fires assertion.
1847     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1848     if (Ctor == nullptr) {
1849       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1850                                              /*isVarArg=*/false)
1851                          ->getPointerTo();
1852       Ctor = llvm::Constant::getNullValue(CtorTy);
1853     }
1854     if (Dtor == nullptr) {
1855       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Dtor = llvm::Constant::getNullValue(DtorTy);
1859     }
1860     if (!CGF) {
1861       auto *InitFunctionTy =
1862           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1863       std::string Name = getName({"__omp_threadprivate_init_", ""});
1864       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1865           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1866       CodeGenFunction InitCGF(CGM);
1867       FunctionArgList ArgList;
1868       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1869                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1870                             Loc, Loc);
1871       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1872       InitCGF.FinishFunction();
1873       return InitFunction;
1874     }
1875     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1876   }
1877   return nullptr;
1878 }
1879 
1880 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1881                                                      llvm::GlobalVariable *Addr,
1882                                                      bool PerformInit) {
1883   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1884       !CGM.getLangOpts().OpenMPIsDevice)
1885     return false;
1886   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1887       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1888   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1889       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1890        HasRequiresUnifiedSharedMemory))
1891     return CGM.getLangOpts().OpenMPIsDevice;
1892   VD = VD->getDefinition(CGM.getContext());
1893   assert(VD && "Unknown VarDecl");
1894 
1895   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1896     return CGM.getLangOpts().OpenMPIsDevice;
1897 
1898   QualType ASTTy = VD->getType();
1899   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1900 
1901   // Produce the unique prefix to identify the new target regions. We use
1902   // the source location of the variable declaration which we know to not
1903   // conflict with any target region.
1904   unsigned DeviceID;
1905   unsigned FileID;
1906   unsigned Line;
1907   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1908   SmallString<128> Buffer, Out;
1909   {
1910     llvm::raw_svector_ostream OS(Buffer);
1911     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1912        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1913   }
1914 
1915   const Expr *Init = VD->getAnyInitializer();
1916   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1917     llvm::Constant *Ctor;
1918     llvm::Constant *ID;
1919     if (CGM.getLangOpts().OpenMPIsDevice) {
1920       // Generate function that re-emits the declaration's initializer into
1921       // the threadprivate copy of the variable VD
1922       CodeGenFunction CtorCGF(CGM);
1923 
1924       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1925       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1926       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1927           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1928       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1929       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1930                             FunctionArgList(), Loc, Loc);
1931       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1932       CtorCGF.EmitAnyExprToMem(Init,
1933                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1934                                Init->getType().getQualifiers(),
1935                                /*IsInitializer=*/true);
1936       CtorCGF.FinishFunction();
1937       Ctor = Fn;
1938       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1939       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1940     } else {
1941       Ctor = new llvm::GlobalVariable(
1942           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1943           llvm::GlobalValue::PrivateLinkage,
1944           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1945       ID = Ctor;
1946     }
1947 
1948     // Register the information for the entry associated with the constructor.
1949     Out.clear();
1950     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1951         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1952         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1953   }
1954   if (VD->getType().isDestructedType() != QualType::DK_none) {
1955     llvm::Constant *Dtor;
1956     llvm::Constant *ID;
1957     if (CGM.getLangOpts().OpenMPIsDevice) {
1958       // Generate function that emits destructor call for the threadprivate
1959       // copy of the variable VD
1960       CodeGenFunction DtorCGF(CGM);
1961 
1962       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1963       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1964       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1965           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1966       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1967       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1968                             FunctionArgList(), Loc, Loc);
1969       // Create a scope with an artificial location for the body of this
1970       // function.
1971       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1972       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1973                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1974                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1975       DtorCGF.FinishFunction();
1976       Dtor = Fn;
1977       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1978       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1979     } else {
1980       Dtor = new llvm::GlobalVariable(
1981           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1982           llvm::GlobalValue::PrivateLinkage,
1983           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1984       ID = Dtor;
1985     }
1986     // Register the information for the entry associated with the destructor.
1987     Out.clear();
1988     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1989         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1990         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1991   }
1992   return CGM.getLangOpts().OpenMPIsDevice;
1993 }
1994 
1995 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1996                                                           QualType VarType,
1997                                                           StringRef Name) {
1998   std::string Suffix = getName({"artificial", ""});
1999   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2000   llvm::Value *GAddr =
2001       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2002   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2003       CGM.getTarget().isTLSSupported()) {
2004     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2005     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2006   }
2007   std::string CacheSuffix = getName({"cache", ""});
2008   llvm::Value *Args[] = {
2009       emitUpdateLocation(CGF, SourceLocation()),
2010       getThreadID(CGF, SourceLocation()),
2011       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2012       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2013                                 /*isSigned=*/false),
2014       getOrCreateInternalVariable(
2015           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2016   return Address(
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2018           CGF.EmitRuntimeCall(
2019               OMPBuilder.getOrCreateRuntimeFunction(
2020                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2021               Args),
2022           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2023       CGM.getContext().getTypeAlignInChars(VarType));
2024 }
2025 
2026 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2027                                    const RegionCodeGenTy &ThenGen,
2028                                    const RegionCodeGenTy &ElseGen) {
2029   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2030 
2031   // If the condition constant folds and can be elided, try to avoid emitting
2032   // the condition and the dead arm of the if/else.
2033   bool CondConstant;
2034   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2035     if (CondConstant)
2036       ThenGen(CGF);
2037     else
2038       ElseGen(CGF);
2039     return;
2040   }
2041 
2042   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2043   // emit the conditional branch.
2044   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2045   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2046   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2047   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2048 
2049   // Emit the 'then' code.
2050   CGF.EmitBlock(ThenBlock);
2051   ThenGen(CGF);
2052   CGF.EmitBranch(ContBlock);
2053   // Emit the 'else' code if present.
2054   // There is no need to emit line number for unconditional branch.
2055   (void)ApplyDebugLocation::CreateEmpty(CGF);
2056   CGF.EmitBlock(ElseBlock);
2057   ElseGen(CGF);
2058   // There is no need to emit line number for unconditional branch.
2059   (void)ApplyDebugLocation::CreateEmpty(CGF);
2060   CGF.EmitBranch(ContBlock);
2061   // Emit the continuation block for code after the if.
2062   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2063 }
2064 
2065 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2066                                        llvm::Function *OutlinedFn,
2067                                        ArrayRef<llvm::Value *> CapturedVars,
2068                                        const Expr *IfCond) {
2069   if (!CGF.HaveInsertPoint())
2070     return;
2071   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2072   auto &M = CGM.getModule();
2073   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2074                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2075     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2076     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2077     llvm::Value *Args[] = {
2078         RTLoc,
2079         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2080         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2081     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2082     RealArgs.append(std::begin(Args), std::end(Args));
2083     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2084 
2085     llvm::FunctionCallee RTLFn =
2086         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2087     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2088   };
2089   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2092     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2093     // Build calls:
2094     // __kmpc_serialized_parallel(&Loc, GTid);
2095     llvm::Value *Args[] = {RTLoc, ThreadID};
2096     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2097                             M, OMPRTL___kmpc_serialized_parallel),
2098                         Args);
2099 
2100     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2101     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2102     Address ZeroAddrBound =
2103         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2104                                          /*Name=*/".bound.zero.addr");
2105     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2106     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2107     // ThreadId for serialized parallels is 0.
2108     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2109     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2110     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2111 
2112     // Ensure we do not inline the function. This is trivially true for the ones
2113     // passed to __kmpc_fork_call but the ones calles in serialized regions
2114     // could be inlined. This is not a perfect but it is closer to the invariant
2115     // we want, namely, every data environment starts with a new function.
2116     // TODO: We should pass the if condition to the runtime function and do the
2117     //       handling there. Much cleaner code.
2118     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2119     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2120 
2121     // __kmpc_end_serialized_parallel(&Loc, GTid);
2122     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2123     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2124                             M, OMPRTL___kmpc_end_serialized_parallel),
2125                         EndArgs);
2126   };
2127   if (IfCond) {
2128     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2129   } else {
2130     RegionCodeGenTy ThenRCG(ThenGen);
2131     ThenRCG(CGF);
2132   }
2133 }
2134 
2135 // If we're inside an (outlined) parallel region, use the region info's
2136 // thread-ID variable (it is passed in a first argument of the outlined function
2137 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2138 // regular serial code region, get thread ID by calling kmp_int32
2139 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2140 // return the address of that temp.
2141 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2142                                              SourceLocation Loc) {
2143   if (auto *OMPRegionInfo =
2144           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2145     if (OMPRegionInfo->getThreadIDVariable())
2146       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2147 
2148   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2149   QualType Int32Ty =
2150       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2151   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2152   CGF.EmitStoreOfScalar(ThreadID,
2153                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2154 
2155   return ThreadIDTemp;
2156 }
2157 
2158 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2159     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2160   SmallString<256> Buffer;
2161   llvm::raw_svector_ostream Out(Buffer);
2162   Out << Name;
2163   StringRef RuntimeName = Out.str();
2164   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2165   if (Elem.second) {
2166     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2167            "OMP internal variable has different type than requested");
2168     return &*Elem.second;
2169   }
2170 
2171   return Elem.second = new llvm::GlobalVariable(
2172              CGM.getModule(), Ty, /*IsConstant*/ false,
2173              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2174              Elem.first(), /*InsertBefore=*/nullptr,
2175              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2176 }
2177 
2178 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2179   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2180   std::string Name = getName({Prefix, "var"});
2181   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2182 }
2183 
2184 namespace {
2185 /// Common pre(post)-action for different OpenMP constructs.
2186 class CommonActionTy final : public PrePostActionTy {
2187   llvm::FunctionCallee EnterCallee;
2188   ArrayRef<llvm::Value *> EnterArgs;
2189   llvm::FunctionCallee ExitCallee;
2190   ArrayRef<llvm::Value *> ExitArgs;
2191   bool Conditional;
2192   llvm::BasicBlock *ContBlock = nullptr;
2193 
2194 public:
2195   CommonActionTy(llvm::FunctionCallee EnterCallee,
2196                  ArrayRef<llvm::Value *> EnterArgs,
2197                  llvm::FunctionCallee ExitCallee,
2198                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2199       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2200         ExitArgs(ExitArgs), Conditional(Conditional) {}
2201   void Enter(CodeGenFunction &CGF) override {
2202     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2203     if (Conditional) {
2204       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2205       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2206       ContBlock = CGF.createBasicBlock("omp_if.end");
2207       // Generate the branch (If-stmt)
2208       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2209       CGF.EmitBlock(ThenBlock);
2210     }
2211   }
2212   void Done(CodeGenFunction &CGF) {
2213     // Emit the rest of blocks/branches
2214     CGF.EmitBranch(ContBlock);
2215     CGF.EmitBlock(ContBlock, true);
2216   }
2217   void Exit(CodeGenFunction &CGF) override {
2218     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2219   }
2220 };
2221 } // anonymous namespace
2222 
2223 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2224                                          StringRef CriticalName,
2225                                          const RegionCodeGenTy &CriticalOpGen,
2226                                          SourceLocation Loc, const Expr *Hint) {
2227   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2228   // CriticalOpGen();
2229   // __kmpc_end_critical(ident_t *, gtid, Lock);
2230   // Prepare arguments and build a call to __kmpc_critical
2231   if (!CGF.HaveInsertPoint())
2232     return;
2233   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2234                          getCriticalRegionLock(CriticalName)};
2235   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2236                                                 std::end(Args));
2237   if (Hint) {
2238     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2239         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2240   }
2241   CommonActionTy Action(
2242       OMPBuilder.getOrCreateRuntimeFunction(
2243           CGM.getModule(),
2244           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2245       EnterArgs,
2246       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2247                                             OMPRTL___kmpc_end_critical),
2248       Args);
2249   CriticalOpGen.setAction(Action);
2250   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2251 }
2252 
2253 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2254                                        const RegionCodeGenTy &MasterOpGen,
2255                                        SourceLocation Loc) {
2256   if (!CGF.HaveInsertPoint())
2257     return;
2258   // if(__kmpc_master(ident_t *, gtid)) {
2259   //   MasterOpGen();
2260   //   __kmpc_end_master(ident_t *, gtid);
2261   // }
2262   // Prepare arguments and build a call to __kmpc_master
2263   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2264   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2265                             CGM.getModule(), OMPRTL___kmpc_master),
2266                         Args,
2267                         OMPBuilder.getOrCreateRuntimeFunction(
2268                             CGM.getModule(), OMPRTL___kmpc_end_master),
2269                         Args,
2270                         /*Conditional=*/true);
2271   MasterOpGen.setAction(Action);
2272   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2273   Action.Done(CGF);
2274 }
2275 
2276 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2277                                         SourceLocation Loc) {
2278   if (!CGF.HaveInsertPoint())
2279     return;
2280   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2281     OMPBuilder.createTaskyield(CGF.Builder);
2282   } else {
2283     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2284     llvm::Value *Args[] = {
2285         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2286         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2287     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2289                         Args);
2290   }
2291 
2292   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2293     Region->emitUntiedSwitch(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2297                                           const RegionCodeGenTy &TaskgroupOpGen,
2298                                           SourceLocation Loc) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // __kmpc_taskgroup(ident_t *, gtid);
2302   // TaskgroupOpGen();
2303   // __kmpc_end_taskgroup(ident_t *, gtid);
2304   // Prepare arguments and build a call to __kmpc_taskgroup
2305   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2306   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2307                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2308                         Args,
2309                         OMPBuilder.getOrCreateRuntimeFunction(
2310                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2311                         Args);
2312   TaskgroupOpGen.setAction(Action);
2313   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2314 }
2315 
2316 /// Given an array of pointers to variables, project the address of a
2317 /// given variable.
2318 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2319                                       unsigned Index, const VarDecl *Var) {
2320   // Pull out the pointer to the variable.
2321   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2322   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2323 
2324   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2325   Addr = CGF.Builder.CreateElementBitCast(
2326       Addr, CGF.ConvertTypeForMem(Var->getType()));
2327   return Addr;
2328 }
2329 
2330 static llvm::Value *emitCopyprivateCopyFunction(
2331     CodeGenModule &CGM, llvm::Type *ArgsType,
2332     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2333     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2334     SourceLocation Loc) {
2335   ASTContext &C = CGM.getContext();
2336   // void copy_func(void *LHSArg, void *RHSArg);
2337   FunctionArgList Args;
2338   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2339                            ImplicitParamDecl::Other);
2340   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2341                            ImplicitParamDecl::Other);
2342   Args.push_back(&LHSArg);
2343   Args.push_back(&RHSArg);
2344   const auto &CGFI =
2345       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2346   std::string Name =
2347       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2348   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2349                                     llvm::GlobalValue::InternalLinkage, Name,
2350                                     &CGM.getModule());
2351   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2352   Fn->setDoesNotRecurse();
2353   CodeGenFunction CGF(CGM);
2354   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2355   // Dest = (void*[n])(LHSArg);
2356   // Src = (void*[n])(RHSArg);
2357   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2358       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2359       ArgsType), CGF.getPointerAlign());
2360   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2361       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2362       ArgsType), CGF.getPointerAlign());
2363   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2364   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2365   // ...
2366   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2367   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2368     const auto *DestVar =
2369         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2370     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2371 
2372     const auto *SrcVar =
2373         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2374     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2375 
2376     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2377     QualType Type = VD->getType();
2378     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2379   }
2380   CGF.FinishFunction();
2381   return Fn;
2382 }
2383 
2384 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2385                                        const RegionCodeGenTy &SingleOpGen,
2386                                        SourceLocation Loc,
2387                                        ArrayRef<const Expr *> CopyprivateVars,
2388                                        ArrayRef<const Expr *> SrcExprs,
2389                                        ArrayRef<const Expr *> DstExprs,
2390                                        ArrayRef<const Expr *> AssignmentOps) {
2391   if (!CGF.HaveInsertPoint())
2392     return;
2393   assert(CopyprivateVars.size() == SrcExprs.size() &&
2394          CopyprivateVars.size() == DstExprs.size() &&
2395          CopyprivateVars.size() == AssignmentOps.size());
2396   ASTContext &C = CGM.getContext();
2397   // int32 did_it = 0;
2398   // if(__kmpc_single(ident_t *, gtid)) {
2399   //   SingleOpGen();
2400   //   __kmpc_end_single(ident_t *, gtid);
2401   //   did_it = 1;
2402   // }
2403   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2404   // <copy_func>, did_it);
2405 
2406   Address DidIt = Address::invalid();
2407   if (!CopyprivateVars.empty()) {
2408     // int32 did_it = 0;
2409     QualType KmpInt32Ty =
2410         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2411     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2412     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2413   }
2414   // Prepare arguments and build a call to __kmpc_single
2415   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2416   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2417                             CGM.getModule(), OMPRTL___kmpc_single),
2418                         Args,
2419                         OMPBuilder.getOrCreateRuntimeFunction(
2420                             CGM.getModule(), OMPRTL___kmpc_end_single),
2421                         Args,
2422                         /*Conditional=*/true);
2423   SingleOpGen.setAction(Action);
2424   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2425   if (DidIt.isValid()) {
2426     // did_it = 1;
2427     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2428   }
2429   Action.Done(CGF);
2430   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2431   // <copy_func>, did_it);
2432   if (DidIt.isValid()) {
2433     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2434     QualType CopyprivateArrayTy = C.getConstantArrayType(
2435         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2436         /*IndexTypeQuals=*/0);
2437     // Create a list of all private variables for copyprivate.
2438     Address CopyprivateList =
2439         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2440     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2441       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2442       CGF.Builder.CreateStore(
2443           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2444               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2445               CGF.VoidPtrTy),
2446           Elem);
2447     }
2448     // Build function that copies private values from single region to all other
2449     // threads in the corresponding parallel region.
2450     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2451         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2452         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2453     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2454     Address CL =
2455       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2456                                                       CGF.VoidPtrTy);
2457     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2458     llvm::Value *Args[] = {
2459         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2460         getThreadID(CGF, Loc),        // i32 <gtid>
2461         BufSize,                      // size_t <buf_size>
2462         CL.getPointer(),              // void *<copyprivate list>
2463         CpyFn,                        // void (*) (void *, void *) <copy_func>
2464         DidItVal                      // i32 did_it
2465     };
2466     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2467                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2468                         Args);
2469   }
2470 }
2471 
2472 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2473                                         const RegionCodeGenTy &OrderedOpGen,
2474                                         SourceLocation Loc, bool IsThreads) {
2475   if (!CGF.HaveInsertPoint())
2476     return;
2477   // __kmpc_ordered(ident_t *, gtid);
2478   // OrderedOpGen();
2479   // __kmpc_end_ordered(ident_t *, gtid);
2480   // Prepare arguments and build a call to __kmpc_ordered
2481   if (IsThreads) {
2482     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2483     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2484                               CGM.getModule(), OMPRTL___kmpc_ordered),
2485                           Args,
2486                           OMPBuilder.getOrCreateRuntimeFunction(
2487                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2488                           Args);
2489     OrderedOpGen.setAction(Action);
2490     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2491     return;
2492   }
2493   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2494 }
2495 
2496 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2497   unsigned Flags;
2498   if (Kind == OMPD_for)
2499     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2500   else if (Kind == OMPD_sections)
2501     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2502   else if (Kind == OMPD_single)
2503     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2504   else if (Kind == OMPD_barrier)
2505     Flags = OMP_IDENT_BARRIER_EXPL;
2506   else
2507     Flags = OMP_IDENT_BARRIER_IMPL;
2508   return Flags;
2509 }
2510 
2511 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2512     CodeGenFunction &CGF, const OMPLoopDirective &S,
2513     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2514   // Check if the loop directive is actually a doacross loop directive. In this
2515   // case choose static, 1 schedule.
2516   if (llvm::any_of(
2517           S.getClausesOfKind<OMPOrderedClause>(),
2518           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2519     ScheduleKind = OMPC_SCHEDULE_static;
2520     // Chunk size is 1 in this case.
2521     llvm::APInt ChunkSize(32, 1);
2522     ChunkExpr = IntegerLiteral::Create(
2523         CGF.getContext(), ChunkSize,
2524         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2525         SourceLocation());
2526   }
2527 }
2528 
2529 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2530                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2531                                       bool ForceSimpleCall) {
2532   // Check if we should use the OMPBuilder
2533   auto *OMPRegionInfo =
2534       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2535   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2536     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2537         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2538     return;
2539   }
2540 
2541   if (!CGF.HaveInsertPoint())
2542     return;
2543   // Build call __kmpc_cancel_barrier(loc, thread_id);
2544   // Build call __kmpc_barrier(loc, thread_id);
2545   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2546   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2547   // thread_id);
2548   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2549                          getThreadID(CGF, Loc)};
2550   if (OMPRegionInfo) {
2551     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2552       llvm::Value *Result = CGF.EmitRuntimeCall(
2553           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2554                                                 OMPRTL___kmpc_cancel_barrier),
2555           Args);
2556       if (EmitChecks) {
2557         // if (__kmpc_cancel_barrier()) {
2558         //   exit from construct;
2559         // }
2560         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2561         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2562         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2563         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2564         CGF.EmitBlock(ExitBB);
2565         //   exit from construct;
2566         CodeGenFunction::JumpDest CancelDestination =
2567             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2568         CGF.EmitBranchThroughCleanup(CancelDestination);
2569         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2570       }
2571       return;
2572     }
2573   }
2574   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2575                           CGM.getModule(), OMPRTL___kmpc_barrier),
2576                       Args);
2577 }
2578 
2579 /// Map the OpenMP loop schedule to the runtime enumeration.
2580 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2581                                           bool Chunked, bool Ordered) {
2582   switch (ScheduleKind) {
2583   case OMPC_SCHEDULE_static:
2584     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2585                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2586   case OMPC_SCHEDULE_dynamic:
2587     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2588   case OMPC_SCHEDULE_guided:
2589     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2590   case OMPC_SCHEDULE_runtime:
2591     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2592   case OMPC_SCHEDULE_auto:
2593     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2594   case OMPC_SCHEDULE_unknown:
2595     assert(!Chunked && "chunk was specified but schedule kind not known");
2596     return Ordered ? OMP_ord_static : OMP_sch_static;
2597   }
2598   llvm_unreachable("Unexpected runtime schedule");
2599 }
2600 
2601 /// Map the OpenMP distribute schedule to the runtime enumeration.
2602 static OpenMPSchedType
2603 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2604   // only static is allowed for dist_schedule
2605   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2606 }
2607 
2608 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2609                                          bool Chunked) const {
2610   OpenMPSchedType Schedule =
2611       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2612   return Schedule == OMP_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticNonchunked(
2616     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2617   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2618   return Schedule == OMP_dist_sch_static;
2619 }
2620 
2621 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2622                                       bool Chunked) const {
2623   OpenMPSchedType Schedule =
2624       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2625   return Schedule == OMP_sch_static_chunked;
2626 }
2627 
2628 bool CGOpenMPRuntime::isStaticChunked(
2629     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2630   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2631   return Schedule == OMP_dist_sch_static_chunked;
2632 }
2633 
2634 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2635   OpenMPSchedType Schedule =
2636       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2637   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2638   return Schedule != OMP_sch_static;
2639 }
2640 
2641 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2642                                   OpenMPScheduleClauseModifier M1,
2643                                   OpenMPScheduleClauseModifier M2) {
2644   int Modifier = 0;
2645   switch (M1) {
2646   case OMPC_SCHEDULE_MODIFIER_monotonic:
2647     Modifier = OMP_sch_modifier_monotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2650     Modifier = OMP_sch_modifier_nonmonotonic;
2651     break;
2652   case OMPC_SCHEDULE_MODIFIER_simd:
2653     if (Schedule == OMP_sch_static_chunked)
2654       Schedule = OMP_sch_static_balanced_chunked;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_last:
2657   case OMPC_SCHEDULE_MODIFIER_unknown:
2658     break;
2659   }
2660   switch (M2) {
2661   case OMPC_SCHEDULE_MODIFIER_monotonic:
2662     Modifier = OMP_sch_modifier_monotonic;
2663     break;
2664   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2665     Modifier = OMP_sch_modifier_nonmonotonic;
2666     break;
2667   case OMPC_SCHEDULE_MODIFIER_simd:
2668     if (Schedule == OMP_sch_static_chunked)
2669       Schedule = OMP_sch_static_balanced_chunked;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_last:
2672   case OMPC_SCHEDULE_MODIFIER_unknown:
2673     break;
2674   }
2675   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2676   // If the static schedule kind is specified or if the ordered clause is
2677   // specified, and if the nonmonotonic modifier is not specified, the effect is
2678   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2679   // modifier is specified, the effect is as if the nonmonotonic modifier is
2680   // specified.
2681   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2682     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2683           Schedule == OMP_sch_static_balanced_chunked ||
2684           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2685           Schedule == OMP_dist_sch_static_chunked ||
2686           Schedule == OMP_dist_sch_static))
2687       Modifier = OMP_sch_modifier_nonmonotonic;
2688   }
2689   return Schedule | Modifier;
2690 }
2691 
2692 void CGOpenMPRuntime::emitForDispatchInit(
2693     CodeGenFunction &CGF, SourceLocation Loc,
2694     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2695     bool Ordered, const DispatchRTInput &DispatchValues) {
2696   if (!CGF.HaveInsertPoint())
2697     return;
2698   OpenMPSchedType Schedule = getRuntimeSchedule(
2699       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2700   assert(Ordered ||
2701          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2702           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2703           Schedule != OMP_sch_static_balanced_chunked));
2704   // Call __kmpc_dispatch_init(
2705   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2706   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2707   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2708 
2709   // If the Chunk was not specified in the clause - use default value 1.
2710   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2711                                             : CGF.Builder.getIntN(IVSize, 1);
2712   llvm::Value *Args[] = {
2713       emitUpdateLocation(CGF, Loc),
2714       getThreadID(CGF, Loc),
2715       CGF.Builder.getInt32(addMonoNonMonoModifier(
2716           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2717       DispatchValues.LB,                                     // Lower
2718       DispatchValues.UB,                                     // Upper
2719       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2720       Chunk                                                  // Chunk
2721   };
2722   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2723 }
2724 
2725 static void emitForStaticInitCall(
2726     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2727     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2728     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2729     const CGOpenMPRuntime::StaticRTInput &Values) {
2730   if (!CGF.HaveInsertPoint())
2731     return;
2732 
2733   assert(!Values.Ordered);
2734   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2735          Schedule == OMP_sch_static_balanced_chunked ||
2736          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2737          Schedule == OMP_dist_sch_static ||
2738          Schedule == OMP_dist_sch_static_chunked);
2739 
2740   // Call __kmpc_for_static_init(
2741   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2742   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2743   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2744   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2745   llvm::Value *Chunk = Values.Chunk;
2746   if (Chunk == nullptr) {
2747     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2748             Schedule == OMP_dist_sch_static) &&
2749            "expected static non-chunked schedule");
2750     // If the Chunk was not specified in the clause - use default value 1.
2751     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2752   } else {
2753     assert((Schedule == OMP_sch_static_chunked ||
2754             Schedule == OMP_sch_static_balanced_chunked ||
2755             Schedule == OMP_ord_static_chunked ||
2756             Schedule == OMP_dist_sch_static_chunked) &&
2757            "expected static chunked schedule");
2758   }
2759   llvm::Value *Args[] = {
2760       UpdateLocation,
2761       ThreadId,
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2763                                                   M2)), // Schedule type
2764       Values.IL.getPointer(),                           // &isLastIter
2765       Values.LB.getPointer(),                           // &LB
2766       Values.UB.getPointer(),                           // &UB
2767       Values.ST.getPointer(),                           // &Stride
2768       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2769       Chunk                                             // Chunk
2770   };
2771   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2772 }
2773 
2774 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2775                                         SourceLocation Loc,
2776                                         OpenMPDirectiveKind DKind,
2777                                         const OpenMPScheduleTy &ScheduleKind,
2778                                         const StaticRTInput &Values) {
2779   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2780       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2781   assert(isOpenMPWorksharingDirective(DKind) &&
2782          "Expected loop-based or sections-based directive.");
2783   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2784                                              isOpenMPLoopDirective(DKind)
2785                                                  ? OMP_IDENT_WORK_LOOP
2786                                                  : OMP_IDENT_WORK_SECTIONS);
2787   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2788   llvm::FunctionCallee StaticInitFunction =
2789       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2790   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2791   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2792                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2793 }
2794 
2795 void CGOpenMPRuntime::emitDistributeStaticInit(
2796     CodeGenFunction &CGF, SourceLocation Loc,
2797     OpenMPDistScheduleClauseKind SchedKind,
2798     const CGOpenMPRuntime::StaticRTInput &Values) {
2799   OpenMPSchedType ScheduleNum =
2800       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2801   llvm::Value *UpdatedLocation =
2802       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2803   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2804   llvm::FunctionCallee StaticInitFunction =
2805       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2806   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2807                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2808                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2809 }
2810 
2811 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2812                                           SourceLocation Loc,
2813                                           OpenMPDirectiveKind DKind) {
2814   if (!CGF.HaveInsertPoint())
2815     return;
2816   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2817   llvm::Value *Args[] = {
2818       emitUpdateLocation(CGF, Loc,
2819                          isOpenMPDistributeDirective(DKind)
2820                              ? OMP_IDENT_WORK_DISTRIBUTE
2821                              : isOpenMPLoopDirective(DKind)
2822                                    ? OMP_IDENT_WORK_LOOP
2823                                    : OMP_IDENT_WORK_SECTIONS),
2824       getThreadID(CGF, Loc)};
2825   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2827                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2828                       Args);
2829 }
2830 
2831 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2832                                                  SourceLocation Loc,
2833                                                  unsigned IVSize,
2834                                                  bool IVSigned) {
2835   if (!CGF.HaveInsertPoint())
2836     return;
2837   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2838   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2839   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2840 }
2841 
2842 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2843                                           SourceLocation Loc, unsigned IVSize,
2844                                           bool IVSigned, Address IL,
2845                                           Address LB, Address UB,
2846                                           Address ST) {
2847   // Call __kmpc_dispatch_next(
2848   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2849   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2850   //          kmp_int[32|64] *p_stride);
2851   llvm::Value *Args[] = {
2852       emitUpdateLocation(CGF, Loc),
2853       getThreadID(CGF, Loc),
2854       IL.getPointer(), // &isLastIter
2855       LB.getPointer(), // &Lower
2856       UB.getPointer(), // &Upper
2857       ST.getPointer()  // &Stride
2858   };
2859   llvm::Value *Call =
2860       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2861   return CGF.EmitScalarConversion(
2862       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2863       CGF.getContext().BoolTy, Loc);
2864 }
2865 
2866 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2867                                            llvm::Value *NumThreads,
2868                                            SourceLocation Loc) {
2869   if (!CGF.HaveInsertPoint())
2870     return;
2871   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2872   llvm::Value *Args[] = {
2873       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2874       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2875   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2876                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2877                       Args);
2878 }
2879 
2880 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2881                                          ProcBindKind ProcBind,
2882                                          SourceLocation Loc) {
2883   if (!CGF.HaveInsertPoint())
2884     return;
2885   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2886   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2887   llvm::Value *Args[] = {
2888       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2889       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2890   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2891                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2892                       Args);
2893 }
2894 
2895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2896                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2897   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2898     OMPBuilder.createFlush(CGF.Builder);
2899   } else {
2900     if (!CGF.HaveInsertPoint())
2901       return;
2902     // Build call void __kmpc_flush(ident_t *loc)
2903     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2904                             CGM.getModule(), OMPRTL___kmpc_flush),
2905                         emitUpdateLocation(CGF, Loc));
2906   }
2907 }
2908 
2909 namespace {
2910 /// Indexes of fields for type kmp_task_t.
2911 enum KmpTaskTFields {
2912   /// List of shared variables.
2913   KmpTaskTShareds,
2914   /// Task routine.
2915   KmpTaskTRoutine,
2916   /// Partition id for the untied tasks.
2917   KmpTaskTPartId,
2918   /// Function with call of destructors for private variables.
2919   Data1,
2920   /// Task priority.
2921   Data2,
2922   /// (Taskloops only) Lower bound.
2923   KmpTaskTLowerBound,
2924   /// (Taskloops only) Upper bound.
2925   KmpTaskTUpperBound,
2926   /// (Taskloops only) Stride.
2927   KmpTaskTStride,
2928   /// (Taskloops only) Is last iteration flag.
2929   KmpTaskTLastIter,
2930   /// (Taskloops only) Reduction data.
2931   KmpTaskTReductions,
2932 };
2933 } // anonymous namespace
2934 
2935 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2936   return OffloadEntriesTargetRegion.empty() &&
2937          OffloadEntriesDeviceGlobalVar.empty();
2938 }
2939 
2940 /// Initialize target region entry.
2941 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2942     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2943                                     StringRef ParentName, unsigned LineNum,
2944                                     unsigned Order) {
2945   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2946                                              "only required for the device "
2947                                              "code generation.");
2948   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2949       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2950                                    OMPTargetRegionEntryTargetRegion);
2951   ++OffloadingEntriesNum;
2952 }
2953 
2954 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2955     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2956                                   StringRef ParentName, unsigned LineNum,
2957                                   llvm::Constant *Addr, llvm::Constant *ID,
2958                                   OMPTargetRegionEntryKind Flags) {
2959   // If we are emitting code for a target, the entry is already initialized,
2960   // only has to be registered.
2961   if (CGM.getLangOpts().OpenMPIsDevice) {
2962     // This could happen if the device compilation is invoked standalone.
2963     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2964       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2965                                       OffloadingEntriesNum);
2966     auto &Entry =
2967         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2968     Entry.setAddress(Addr);
2969     Entry.setID(ID);
2970     Entry.setFlags(Flags);
2971   } else {
2972     if (Flags ==
2973             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2974         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2975                                  /*IgnoreAddressId*/ true))
2976       return;
2977     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2978            "Target region entry already registered!");
2979     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2980     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2981     ++OffloadingEntriesNum;
2982   }
2983 }
2984 
2985 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2986     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2987     bool IgnoreAddressId) const {
2988   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2989   if (PerDevice == OffloadEntriesTargetRegion.end())
2990     return false;
2991   auto PerFile = PerDevice->second.find(FileID);
2992   if (PerFile == PerDevice->second.end())
2993     return false;
2994   auto PerParentName = PerFile->second.find(ParentName);
2995   if (PerParentName == PerFile->second.end())
2996     return false;
2997   auto PerLine = PerParentName->second.find(LineNum);
2998   if (PerLine == PerParentName->second.end())
2999     return false;
3000   // Fail if this entry is already registered.
3001   if (!IgnoreAddressId &&
3002       (PerLine->second.getAddress() || PerLine->second.getID()))
3003     return false;
3004   return true;
3005 }
3006 
3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3008     const OffloadTargetRegionEntryInfoActTy &Action) {
3009   // Scan all target region entries and perform the provided action.
3010   for (const auto &D : OffloadEntriesTargetRegion)
3011     for (const auto &F : D.second)
3012       for (const auto &P : F.second)
3013         for (const auto &L : P.second)
3014           Action(D.first, F.first, P.first(), L.first, L.second);
3015 }
3016 
3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3018     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3019                                        OMPTargetGlobalVarEntryKind Flags,
3020                                        unsigned Order) {
3021   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3022                                              "only required for the device "
3023                                              "code generation.");
3024   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3025   ++OffloadingEntriesNum;
3026 }
3027 
3028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3029     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3030                                      CharUnits VarSize,
3031                                      OMPTargetGlobalVarEntryKind Flags,
3032                                      llvm::GlobalValue::LinkageTypes Linkage) {
3033   if (CGM.getLangOpts().OpenMPIsDevice) {
3034     // This could happen if the device compilation is invoked standalone.
3035     if (!hasDeviceGlobalVarEntryInfo(VarName))
3036       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3037     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3038     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3039            "Resetting with the new address.");
3040     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3041       if (Entry.getVarSize().isZero()) {
3042         Entry.setVarSize(VarSize);
3043         Entry.setLinkage(Linkage);
3044       }
3045       return;
3046     }
3047     Entry.setVarSize(VarSize);
3048     Entry.setLinkage(Linkage);
3049     Entry.setAddress(Addr);
3050   } else {
3051     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3052       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3053       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3054              "Entry not initialized!");
3055       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3056              "Resetting with the new address.");
3057       if (Entry.getVarSize().isZero()) {
3058         Entry.setVarSize(VarSize);
3059         Entry.setLinkage(Linkage);
3060       }
3061       return;
3062     }
3063     OffloadEntriesDeviceGlobalVar.try_emplace(
3064         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3065     ++OffloadingEntriesNum;
3066   }
3067 }
3068 
3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3070     actOnDeviceGlobalVarEntriesInfo(
3071         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3072   // Scan all target region entries and perform the provided action.
3073   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3074     Action(E.getKey(), E.getValue());
3075 }
3076 
3077 void CGOpenMPRuntime::createOffloadEntry(
3078     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3079     llvm::GlobalValue::LinkageTypes Linkage) {
3080   StringRef Name = Addr->getName();
3081   llvm::Module &M = CGM.getModule();
3082   llvm::LLVMContext &C = M.getContext();
3083 
3084   // Create constant string with the name.
3085   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3086 
3087   std::string StringName = getName({"omp_offloading", "entry_name"});
3088   auto *Str = new llvm::GlobalVariable(
3089       M, StrPtrInit->getType(), /*isConstant=*/true,
3090       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3091   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3092 
3093   llvm::Constant *Data[] = {
3094       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3095       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3096       llvm::ConstantInt::get(CGM.SizeTy, Size),
3097       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3098       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3099   std::string EntryName = getName({"omp_offloading", "entry", ""});
3100   llvm::GlobalVariable *Entry = createGlobalStruct(
3101       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3102       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3103 
3104   // The entry has to be created in the section the linker expects it to be.
3105   Entry->setSection("omp_offloading_entries");
3106 }
3107 
3108 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3109   // Emit the offloading entries and metadata so that the device codegen side
3110   // can easily figure out what to emit. The produced metadata looks like
3111   // this:
3112   //
3113   // !omp_offload.info = !{!1, ...}
3114   //
3115   // Right now we only generate metadata for function that contain target
3116   // regions.
3117 
3118   // If we are in simd mode or there are no entries, we don't need to do
3119   // anything.
3120   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3121     return;
3122 
3123   llvm::Module &M = CGM.getModule();
3124   llvm::LLVMContext &C = M.getContext();
3125   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3126                          SourceLocation, StringRef>,
3127               16>
3128       OrderedEntries(OffloadEntriesInfoManager.size());
3129   llvm::SmallVector<StringRef, 16> ParentFunctions(
3130       OffloadEntriesInfoManager.size());
3131 
3132   // Auxiliary methods to create metadata values and strings.
3133   auto &&GetMDInt = [this](unsigned V) {
3134     return llvm::ConstantAsMetadata::get(
3135         llvm::ConstantInt::get(CGM.Int32Ty, V));
3136   };
3137 
3138   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3139 
3140   // Create the offloading info metadata node.
3141   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3142 
3143   // Create function that emits metadata for each target region entry;
3144   auto &&TargetRegionMetadataEmitter =
3145       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3146        &GetMDString](
3147           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3148           unsigned Line,
3149           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3150         // Generate metadata for target regions. Each entry of this metadata
3151         // contains:
3152         // - Entry 0 -> Kind of this type of metadata (0).
3153         // - Entry 1 -> Device ID of the file where the entry was identified.
3154         // - Entry 2 -> File ID of the file where the entry was identified.
3155         // - Entry 3 -> Mangled name of the function where the entry was
3156         // identified.
3157         // - Entry 4 -> Line in the file where the entry was identified.
3158         // - Entry 5 -> Order the entry was created.
3159         // The first element of the metadata node is the kind.
3160         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3161                                  GetMDInt(FileID),      GetMDString(ParentName),
3162                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3163 
3164         SourceLocation Loc;
3165         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3166                   E = CGM.getContext().getSourceManager().fileinfo_end();
3167              I != E; ++I) {
3168           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3169               I->getFirst()->getUniqueID().getFile() == FileID) {
3170             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3171                 I->getFirst(), Line, 1);
3172             break;
3173           }
3174         }
3175         // Save this entry in the right position of the ordered entries array.
3176         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3177         ParentFunctions[E.getOrder()] = ParentName;
3178 
3179         // Add metadata to the named metadata node.
3180         MD->addOperand(llvm::MDNode::get(C, Ops));
3181       };
3182 
3183   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3184       TargetRegionMetadataEmitter);
3185 
3186   // Create function that emits metadata for each device global variable entry;
3187   auto &&DeviceGlobalVarMetadataEmitter =
3188       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3189        MD](StringRef MangledName,
3190            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3191                &E) {
3192         // Generate metadata for global variables. Each entry of this metadata
3193         // contains:
3194         // - Entry 0 -> Kind of this type of metadata (1).
3195         // - Entry 1 -> Mangled name of the variable.
3196         // - Entry 2 -> Declare target kind.
3197         // - Entry 3 -> Order the entry was created.
3198         // The first element of the metadata node is the kind.
3199         llvm::Metadata *Ops[] = {
3200             GetMDInt(E.getKind()), GetMDString(MangledName),
3201             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3202 
3203         // Save this entry in the right position of the ordered entries array.
3204         OrderedEntries[E.getOrder()] =
3205             std::make_tuple(&E, SourceLocation(), MangledName);
3206 
3207         // Add metadata to the named metadata node.
3208         MD->addOperand(llvm::MDNode::get(C, Ops));
3209       };
3210 
3211   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3212       DeviceGlobalVarMetadataEmitter);
3213 
3214   for (const auto &E : OrderedEntries) {
3215     assert(std::get<0>(E) && "All ordered entries must exist!");
3216     if (const auto *CE =
3217             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3218                 std::get<0>(E))) {
3219       if (!CE->getID() || !CE->getAddress()) {
3220         // Do not blame the entry if the parent funtion is not emitted.
3221         StringRef FnName = ParentFunctions[CE->getOrder()];
3222         if (!CGM.GetGlobalValue(FnName))
3223           continue;
3224         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3225             DiagnosticsEngine::Error,
3226             "Offloading entry for target region in %0 is incorrect: either the "
3227             "address or the ID is invalid.");
3228         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3229         continue;
3230       }
3231       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3232                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3233     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3234                                              OffloadEntryInfoDeviceGlobalVar>(
3235                    std::get<0>(E))) {
3236       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3237           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3238               CE->getFlags());
3239       switch (Flags) {
3240       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3241         if (CGM.getLangOpts().OpenMPIsDevice &&
3242             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3243           continue;
3244         if (!CE->getAddress()) {
3245           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3246               DiagnosticsEngine::Error, "Offloading entry for declare target "
3247                                         "variable %0 is incorrect: the "
3248                                         "address is invalid.");
3249           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3250           continue;
3251         }
3252         // The vaiable has no definition - no need to add the entry.
3253         if (CE->getVarSize().isZero())
3254           continue;
3255         break;
3256       }
3257       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3258         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3259                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3260                "Declaret target link address is set.");
3261         if (CGM.getLangOpts().OpenMPIsDevice)
3262           continue;
3263         if (!CE->getAddress()) {
3264           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3265               DiagnosticsEngine::Error,
3266               "Offloading entry for declare target variable is incorrect: the "
3267               "address is invalid.");
3268           CGM.getDiags().Report(DiagID);
3269           continue;
3270         }
3271         break;
3272       }
3273       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3274                          CE->getVarSize().getQuantity(), Flags,
3275                          CE->getLinkage());
3276     } else {
3277       llvm_unreachable("Unsupported entry kind.");
3278     }
3279   }
3280 }
3281 
3282 /// Loads all the offload entries information from the host IR
3283 /// metadata.
3284 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3285   // If we are in target mode, load the metadata from the host IR. This code has
3286   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3287 
3288   if (!CGM.getLangOpts().OpenMPIsDevice)
3289     return;
3290 
3291   if (CGM.getLangOpts().OMPHostIRFile.empty())
3292     return;
3293 
3294   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3295   if (auto EC = Buf.getError()) {
3296     CGM.getDiags().Report(diag::err_cannot_open_file)
3297         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3298     return;
3299   }
3300 
3301   llvm::LLVMContext C;
3302   auto ME = expectedToErrorOrAndEmitErrors(
3303       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3304 
3305   if (auto EC = ME.getError()) {
3306     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3307         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3308     CGM.getDiags().Report(DiagID)
3309         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3310     return;
3311   }
3312 
3313   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3314   if (!MD)
3315     return;
3316 
3317   for (llvm::MDNode *MN : MD->operands()) {
3318     auto &&GetMDInt = [MN](unsigned Idx) {
3319       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3320       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3321     };
3322 
3323     auto &&GetMDString = [MN](unsigned Idx) {
3324       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3325       return V->getString();
3326     };
3327 
3328     switch (GetMDInt(0)) {
3329     default:
3330       llvm_unreachable("Unexpected metadata!");
3331       break;
3332     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3333         OffloadingEntryInfoTargetRegion:
3334       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3335           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3336           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3337           /*Order=*/GetMDInt(5));
3338       break;
3339     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3340         OffloadingEntryInfoDeviceGlobalVar:
3341       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3342           /*MangledName=*/GetMDString(1),
3343           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3344               /*Flags=*/GetMDInt(2)),
3345           /*Order=*/GetMDInt(3));
3346       break;
3347     }
3348   }
3349 }
3350 
3351 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3352   if (!KmpRoutineEntryPtrTy) {
3353     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3354     ASTContext &C = CGM.getContext();
3355     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3356     FunctionProtoType::ExtProtoInfo EPI;
3357     KmpRoutineEntryPtrQTy = C.getPointerType(
3358         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3359     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3360   }
3361 }
3362 
3363 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3364   // Make sure the type of the entry is already created. This is the type we
3365   // have to create:
3366   // struct __tgt_offload_entry{
3367   //   void      *addr;       // Pointer to the offload entry info.
3368   //                          // (function or global)
3369   //   char      *name;       // Name of the function or global.
3370   //   size_t     size;       // Size of the entry info (0 if it a function).
3371   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3372   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3373   // };
3374   if (TgtOffloadEntryQTy.isNull()) {
3375     ASTContext &C = CGM.getContext();
3376     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3377     RD->startDefinition();
3378     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3379     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3380     addFieldToRecordDecl(C, RD, C.getSizeType());
3381     addFieldToRecordDecl(
3382         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3383     addFieldToRecordDecl(
3384         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3385     RD->completeDefinition();
3386     RD->addAttr(PackedAttr::CreateImplicit(C));
3387     TgtOffloadEntryQTy = C.getRecordType(RD);
3388   }
3389   return TgtOffloadEntryQTy;
3390 }
3391 
3392 namespace {
3393 struct PrivateHelpersTy {
3394   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3395                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3396       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3397         PrivateElemInit(PrivateElemInit) {}
3398   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3399   const Expr *OriginalRef = nullptr;
3400   const VarDecl *Original = nullptr;
3401   const VarDecl *PrivateCopy = nullptr;
3402   const VarDecl *PrivateElemInit = nullptr;
3403   bool isLocalPrivate() const {
3404     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3405   }
3406 };
3407 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3408 } // anonymous namespace
3409 
3410 static bool isAllocatableDecl(const VarDecl *VD) {
3411   const VarDecl *CVD = VD->getCanonicalDecl();
3412   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3413     return false;
3414   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3415   // Use the default allocation.
3416   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3417             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3418            !AA->getAllocator());
3419 }
3420 
3421 static RecordDecl *
3422 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3423   if (!Privates.empty()) {
3424     ASTContext &C = CGM.getContext();
3425     // Build struct .kmp_privates_t. {
3426     //         /*  private vars  */
3427     //       };
3428     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3429     RD->startDefinition();
3430     for (const auto &Pair : Privates) {
3431       const VarDecl *VD = Pair.second.Original;
3432       QualType Type = VD->getType().getNonReferenceType();
3433       // If the private variable is a local variable with lvalue ref type,
3434       // allocate the pointer instead of the pointee type.
3435       if (Pair.second.isLocalPrivate()) {
3436         if (VD->getType()->isLValueReferenceType())
3437           Type = C.getPointerType(Type);
3438         if (isAllocatableDecl(VD))
3439           Type = C.getPointerType(Type);
3440       }
3441       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3442       if (VD->hasAttrs()) {
3443         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3444              E(VD->getAttrs().end());
3445              I != E; ++I)
3446           FD->addAttr(*I);
3447       }
3448     }
3449     RD->completeDefinition();
3450     return RD;
3451   }
3452   return nullptr;
3453 }
3454 
3455 static RecordDecl *
3456 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3457                          QualType KmpInt32Ty,
3458                          QualType KmpRoutineEntryPointerQTy) {
3459   ASTContext &C = CGM.getContext();
3460   // Build struct kmp_task_t {
3461   //         void *              shareds;
3462   //         kmp_routine_entry_t routine;
3463   //         kmp_int32           part_id;
3464   //         kmp_cmplrdata_t data1;
3465   //         kmp_cmplrdata_t data2;
3466   // For taskloops additional fields:
3467   //         kmp_uint64          lb;
3468   //         kmp_uint64          ub;
3469   //         kmp_int64           st;
3470   //         kmp_int32           liter;
3471   //         void *              reductions;
3472   //       };
3473   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3474   UD->startDefinition();
3475   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3476   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3477   UD->completeDefinition();
3478   QualType KmpCmplrdataTy = C.getRecordType(UD);
3479   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3480   RD->startDefinition();
3481   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3482   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3483   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3484   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3485   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3486   if (isOpenMPTaskLoopDirective(Kind)) {
3487     QualType KmpUInt64Ty =
3488         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3489     QualType KmpInt64Ty =
3490         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3491     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3492     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3493     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3494     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3495     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3496   }
3497   RD->completeDefinition();
3498   return RD;
3499 }
3500 
3501 static RecordDecl *
3502 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3503                                      ArrayRef<PrivateDataTy> Privates) {
3504   ASTContext &C = CGM.getContext();
3505   // Build struct kmp_task_t_with_privates {
3506   //         kmp_task_t task_data;
3507   //         .kmp_privates_t. privates;
3508   //       };
3509   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3510   RD->startDefinition();
3511   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3512   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3513     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3514   RD->completeDefinition();
3515   return RD;
3516 }
3517 
3518 /// Emit a proxy function which accepts kmp_task_t as the second
3519 /// argument.
3520 /// \code
3521 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3522 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3523 ///   For taskloops:
3524 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3525 ///   tt->reductions, tt->shareds);
3526 ///   return 0;
3527 /// }
3528 /// \endcode
3529 static llvm::Function *
3530 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3531                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3532                       QualType KmpTaskTWithPrivatesPtrQTy,
3533                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3534                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3535                       llvm::Value *TaskPrivatesMap) {
3536   ASTContext &C = CGM.getContext();
3537   FunctionArgList Args;
3538   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3539                             ImplicitParamDecl::Other);
3540   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3541                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3542                                 ImplicitParamDecl::Other);
3543   Args.push_back(&GtidArg);
3544   Args.push_back(&TaskTypeArg);
3545   const auto &TaskEntryFnInfo =
3546       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3547   llvm::FunctionType *TaskEntryTy =
3548       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3549   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3550   auto *TaskEntry = llvm::Function::Create(
3551       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3552   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3553   TaskEntry->setDoesNotRecurse();
3554   CodeGenFunction CGF(CGM);
3555   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3556                     Loc, Loc);
3557 
3558   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3559   // tt,
3560   // For taskloops:
3561   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3562   // tt->task_data.shareds);
3563   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3564       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3565   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3566       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3567       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3568   const auto *KmpTaskTWithPrivatesQTyRD =
3569       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3570   LValue Base =
3571       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3572   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3573   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3574   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3575   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3576 
3577   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3578   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3579   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3580       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3581       CGF.ConvertTypeForMem(SharedsPtrTy));
3582 
3583   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3584   llvm::Value *PrivatesParam;
3585   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3586     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3587     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3588         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3589   } else {
3590     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3591   }
3592 
3593   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3594                                TaskPrivatesMap,
3595                                CGF.Builder
3596                                    .CreatePointerBitCastOrAddrSpaceCast(
3597                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3598                                    .getPointer()};
3599   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3600                                           std::end(CommonArgs));
3601   if (isOpenMPTaskLoopDirective(Kind)) {
3602     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3603     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3604     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3605     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3606     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3607     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3608     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3609     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3610     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3611     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3612     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3613     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3614     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3615     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3616     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3617     CallArgs.push_back(LBParam);
3618     CallArgs.push_back(UBParam);
3619     CallArgs.push_back(StParam);
3620     CallArgs.push_back(LIParam);
3621     CallArgs.push_back(RParam);
3622   }
3623   CallArgs.push_back(SharedsParam);
3624 
3625   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3626                                                   CallArgs);
3627   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3628                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3629   CGF.FinishFunction();
3630   return TaskEntry;
3631 }
3632 
3633 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3634                                             SourceLocation Loc,
3635                                             QualType KmpInt32Ty,
3636                                             QualType KmpTaskTWithPrivatesPtrQTy,
3637                                             QualType KmpTaskTWithPrivatesQTy) {
3638   ASTContext &C = CGM.getContext();
3639   FunctionArgList Args;
3640   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3641                             ImplicitParamDecl::Other);
3642   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3643                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3644                                 ImplicitParamDecl::Other);
3645   Args.push_back(&GtidArg);
3646   Args.push_back(&TaskTypeArg);
3647   const auto &DestructorFnInfo =
3648       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3649   llvm::FunctionType *DestructorFnTy =
3650       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3651   std::string Name =
3652       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3653   auto *DestructorFn =
3654       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3655                              Name, &CGM.getModule());
3656   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3657                                     DestructorFnInfo);
3658   DestructorFn->setDoesNotRecurse();
3659   CodeGenFunction CGF(CGM);
3660   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3661                     Args, Loc, Loc);
3662 
3663   LValue Base = CGF.EmitLoadOfPointerLValue(
3664       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3665       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3666   const auto *KmpTaskTWithPrivatesQTyRD =
3667       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3668   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3669   Base = CGF.EmitLValueForField(Base, *FI);
3670   for (const auto *Field :
3671        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3672     if (QualType::DestructionKind DtorKind =
3673             Field->getType().isDestructedType()) {
3674       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3675       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3676     }
3677   }
3678   CGF.FinishFunction();
3679   return DestructorFn;
3680 }
3681 
3682 /// Emit a privates mapping function for correct handling of private and
3683 /// firstprivate variables.
3684 /// \code
3685 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3686 /// **noalias priv1,...,  <tyn> **noalias privn) {
3687 ///   *priv1 = &.privates.priv1;
3688 ///   ...;
3689 ///   *privn = &.privates.privn;
3690 /// }
3691 /// \endcode
3692 static llvm::Value *
3693 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3694                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3695                                ArrayRef<PrivateDataTy> Privates) {
3696   ASTContext &C = CGM.getContext();
3697   FunctionArgList Args;
3698   ImplicitParamDecl TaskPrivatesArg(
3699       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3700       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3701       ImplicitParamDecl::Other);
3702   Args.push_back(&TaskPrivatesArg);
3703   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3704   unsigned Counter = 1;
3705   for (const Expr *E : Data.PrivateVars) {
3706     Args.push_back(ImplicitParamDecl::Create(
3707         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3708         C.getPointerType(C.getPointerType(E->getType()))
3709             .withConst()
3710             .withRestrict(),
3711         ImplicitParamDecl::Other));
3712     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3713     PrivateVarsPos[VD] = Counter;
3714     ++Counter;
3715   }
3716   for (const Expr *E : Data.FirstprivateVars) {
3717     Args.push_back(ImplicitParamDecl::Create(
3718         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3719         C.getPointerType(C.getPointerType(E->getType()))
3720             .withConst()
3721             .withRestrict(),
3722         ImplicitParamDecl::Other));
3723     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3724     PrivateVarsPos[VD] = Counter;
3725     ++Counter;
3726   }
3727   for (const Expr *E : Data.LastprivateVars) {
3728     Args.push_back(ImplicitParamDecl::Create(
3729         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730         C.getPointerType(C.getPointerType(E->getType()))
3731             .withConst()
3732             .withRestrict(),
3733         ImplicitParamDecl::Other));
3734     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3735     PrivateVarsPos[VD] = Counter;
3736     ++Counter;
3737   }
3738   for (const VarDecl *VD : Data.PrivateLocals) {
3739     QualType Ty = VD->getType().getNonReferenceType();
3740     if (VD->getType()->isLValueReferenceType())
3741       Ty = C.getPointerType(Ty);
3742     if (isAllocatableDecl(VD))
3743       Ty = C.getPointerType(Ty);
3744     Args.push_back(ImplicitParamDecl::Create(
3745         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3746         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3747         ImplicitParamDecl::Other));
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   const auto &TaskPrivatesMapFnInfo =
3752       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3753   llvm::FunctionType *TaskPrivatesMapTy =
3754       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3755   std::string Name =
3756       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3757   auto *TaskPrivatesMap = llvm::Function::Create(
3758       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3759       &CGM.getModule());
3760   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3761                                     TaskPrivatesMapFnInfo);
3762   if (CGM.getLangOpts().Optimize) {
3763     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3764     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3765     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3766   }
3767   CodeGenFunction CGF(CGM);
3768   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3769                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3770 
3771   // *privi = &.privates.privi;
3772   LValue Base = CGF.EmitLoadOfPointerLValue(
3773       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3774       TaskPrivatesArg.getType()->castAs<PointerType>());
3775   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3776   Counter = 0;
3777   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3778     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3779     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3780     LValue RefLVal =
3781         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3782     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3783         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3784     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3785     ++Counter;
3786   }
3787   CGF.FinishFunction();
3788   return TaskPrivatesMap;
3789 }
3790 
3791 /// Emit initialization for private variables in task-based directives.
3792 static void emitPrivatesInit(CodeGenFunction &CGF,
3793                              const OMPExecutableDirective &D,
3794                              Address KmpTaskSharedsPtr, LValue TDBase,
3795                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3796                              QualType SharedsTy, QualType SharedsPtrTy,
3797                              const OMPTaskDataTy &Data,
3798                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3799   ASTContext &C = CGF.getContext();
3800   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3801   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3802   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3803                                  ? OMPD_taskloop
3804                                  : OMPD_task;
3805   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3806   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3807   LValue SrcBase;
3808   bool IsTargetTask =
3809       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3810       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3811   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3812   // PointersArray, SizesArray, and MappersArray. The original variables for
3813   // these arrays are not captured and we get their addresses explicitly.
3814   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3815       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3816     SrcBase = CGF.MakeAddrLValue(
3817         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3818             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3819         SharedsTy);
3820   }
3821   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3822   for (const PrivateDataTy &Pair : Privates) {
3823     // Do not initialize private locals.
3824     if (Pair.second.isLocalPrivate()) {
3825       ++FI;
3826       continue;
3827     }
3828     const VarDecl *VD = Pair.second.PrivateCopy;
3829     const Expr *Init = VD->getAnyInitializer();
3830     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3831                              !CGF.isTrivialInitializer(Init)))) {
3832       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3833       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3834         const VarDecl *OriginalVD = Pair.second.Original;
3835         // Check if the variable is the target-based BasePointersArray,
3836         // PointersArray, SizesArray, or MappersArray.
3837         LValue SharedRefLValue;
3838         QualType Type = PrivateLValue.getType();
3839         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3840         if (IsTargetTask && !SharedField) {
3841           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3842                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3843                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3844                          ->getNumParams() == 0 &&
3845                  isa<TranslationUnitDecl>(
3846                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3847                          ->getDeclContext()) &&
3848                  "Expected artificial target data variable.");
3849           SharedRefLValue =
3850               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3851         } else if (ForDup) {
3852           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3853           SharedRefLValue = CGF.MakeAddrLValue(
3854               Address(SharedRefLValue.getPointer(CGF),
3855                       C.getDeclAlign(OriginalVD)),
3856               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3857               SharedRefLValue.getTBAAInfo());
3858         } else if (CGF.LambdaCaptureFields.count(
3859                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3860                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3861           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3862         } else {
3863           // Processing for implicitly captured variables.
3864           InlinedOpenMPRegionRAII Region(
3865               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3866               /*HasCancel=*/false, /*NoInheritance=*/true);
3867           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3868         }
3869         if (Type->isArrayType()) {
3870           // Initialize firstprivate array.
3871           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3872             // Perform simple memcpy.
3873             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3874           } else {
3875             // Initialize firstprivate array using element-by-element
3876             // initialization.
3877             CGF.EmitOMPAggregateAssign(
3878                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3879                 Type,
3880                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3881                                                   Address SrcElement) {
3882                   // Clean up any temporaries needed by the initialization.
3883                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3884                   InitScope.addPrivate(
3885                       Elem, [SrcElement]() -> Address { return SrcElement; });
3886                   (void)InitScope.Privatize();
3887                   // Emit initialization for single element.
3888                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3889                       CGF, &CapturesInfo);
3890                   CGF.EmitAnyExprToMem(Init, DestElement,
3891                                        Init->getType().getQualifiers(),
3892                                        /*IsInitializer=*/false);
3893                 });
3894           }
3895         } else {
3896           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3897           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3898             return SharedRefLValue.getAddress(CGF);
3899           });
3900           (void)InitScope.Privatize();
3901           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3902           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3903                              /*capturedByInit=*/false);
3904         }
3905       } else {
3906         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3907       }
3908     }
3909     ++FI;
3910   }
3911 }
3912 
3913 /// Check if duplication function is required for taskloops.
3914 static bool checkInitIsRequired(CodeGenFunction &CGF,
3915                                 ArrayRef<PrivateDataTy> Privates) {
3916   bool InitRequired = false;
3917   for (const PrivateDataTy &Pair : Privates) {
3918     if (Pair.second.isLocalPrivate())
3919       continue;
3920     const VarDecl *VD = Pair.second.PrivateCopy;
3921     const Expr *Init = VD->getAnyInitializer();
3922     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3923                                     !CGF.isTrivialInitializer(Init));
3924     if (InitRequired)
3925       break;
3926   }
3927   return InitRequired;
3928 }
3929 
3930 
3931 /// Emit task_dup function (for initialization of
3932 /// private/firstprivate/lastprivate vars and last_iter flag)
3933 /// \code
3934 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3935 /// lastpriv) {
3936 /// // setup lastprivate flag
3937 ///    task_dst->last = lastpriv;
3938 /// // could be constructor calls here...
3939 /// }
3940 /// \endcode
3941 static llvm::Value *
3942 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3943                     const OMPExecutableDirective &D,
3944                     QualType KmpTaskTWithPrivatesPtrQTy,
3945                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3946                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3947                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3948                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3949   ASTContext &C = CGM.getContext();
3950   FunctionArgList Args;
3951   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3952                            KmpTaskTWithPrivatesPtrQTy,
3953                            ImplicitParamDecl::Other);
3954   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3955                            KmpTaskTWithPrivatesPtrQTy,
3956                            ImplicitParamDecl::Other);
3957   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3958                                 ImplicitParamDecl::Other);
3959   Args.push_back(&DstArg);
3960   Args.push_back(&SrcArg);
3961   Args.push_back(&LastprivArg);
3962   const auto &TaskDupFnInfo =
3963       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3964   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3965   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3966   auto *TaskDup = llvm::Function::Create(
3967       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3968   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3969   TaskDup->setDoesNotRecurse();
3970   CodeGenFunction CGF(CGM);
3971   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3972                     Loc);
3973 
3974   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3975       CGF.GetAddrOfLocalVar(&DstArg),
3976       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3977   // task_dst->liter = lastpriv;
3978   if (WithLastIter) {
3979     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3980     LValue Base = CGF.EmitLValueForField(
3981         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3982     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3983     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3984         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3985     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3986   }
3987 
3988   // Emit initial values for private copies (if any).
3989   assert(!Privates.empty());
3990   Address KmpTaskSharedsPtr = Address::invalid();
3991   if (!Data.FirstprivateVars.empty()) {
3992     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3993         CGF.GetAddrOfLocalVar(&SrcArg),
3994         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3995     LValue Base = CGF.EmitLValueForField(
3996         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3997     KmpTaskSharedsPtr = Address(
3998         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3999                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4000                                                   KmpTaskTShareds)),
4001                              Loc),
4002         CGM.getNaturalTypeAlignment(SharedsTy));
4003   }
4004   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4005                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4006   CGF.FinishFunction();
4007   return TaskDup;
4008 }
4009 
4010 /// Checks if destructor function is required to be generated.
4011 /// \return true if cleanups are required, false otherwise.
4012 static bool
4013 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4014                          ArrayRef<PrivateDataTy> Privates) {
4015   for (const PrivateDataTy &P : Privates) {
4016     if (P.second.isLocalPrivate())
4017       continue;
4018     QualType Ty = P.second.Original->getType().getNonReferenceType();
4019     if (Ty.isDestructedType())
4020       return true;
4021   }
4022   return false;
4023 }
4024 
4025 namespace {
4026 /// Loop generator for OpenMP iterator expression.
4027 class OMPIteratorGeneratorScope final
4028     : public CodeGenFunction::OMPPrivateScope {
4029   CodeGenFunction &CGF;
4030   const OMPIteratorExpr *E = nullptr;
4031   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4032   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4033   OMPIteratorGeneratorScope() = delete;
4034   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4035 
4036 public:
4037   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4038       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4039     if (!E)
4040       return;
4041     SmallVector<llvm::Value *, 4> Uppers;
4042     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4043       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4044       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4045       addPrivate(VD, [&CGF, VD]() {
4046         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4047       });
4048       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4049       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4050         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4051                                  "counter.addr");
4052       });
4053     }
4054     Privatize();
4055 
4056     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4057       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4058       LValue CLVal =
4059           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4060                              HelperData.CounterVD->getType());
4061       // Counter = 0;
4062       CGF.EmitStoreOfScalar(
4063           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4064           CLVal);
4065       CodeGenFunction::JumpDest &ContDest =
4066           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4067       CodeGenFunction::JumpDest &ExitDest =
4068           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4069       // N = <number-of_iterations>;
4070       llvm::Value *N = Uppers[I];
4071       // cont:
4072       // if (Counter < N) goto body; else goto exit;
4073       CGF.EmitBlock(ContDest.getBlock());
4074       auto *CVal =
4075           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4076       llvm::Value *Cmp =
4077           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4078               ? CGF.Builder.CreateICmpSLT(CVal, N)
4079               : CGF.Builder.CreateICmpULT(CVal, N);
4080       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4081       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4082       // body:
4083       CGF.EmitBlock(BodyBB);
4084       // Iteri = Begini + Counter * Stepi;
4085       CGF.EmitIgnoredExpr(HelperData.Update);
4086     }
4087   }
4088   ~OMPIteratorGeneratorScope() {
4089     if (!E)
4090       return;
4091     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4092       // Counter = Counter + 1;
4093       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4094       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4095       // goto cont;
4096       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4097       // exit:
4098       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4099     }
4100   }
4101 };
4102 } // namespace
4103 
4104 static std::pair<llvm::Value *, llvm::Value *>
4105 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4106   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4107   llvm::Value *Addr;
4108   if (OASE) {
4109     const Expr *Base = OASE->getBase();
4110     Addr = CGF.EmitScalarExpr(Base);
4111   } else {
4112     Addr = CGF.EmitLValue(E).getPointer(CGF);
4113   }
4114   llvm::Value *SizeVal;
4115   QualType Ty = E->getType();
4116   if (OASE) {
4117     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4118     for (const Expr *SE : OASE->getDimensions()) {
4119       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4120       Sz = CGF.EmitScalarConversion(
4121           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4122       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4123     }
4124   } else if (const auto *ASE =
4125                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4126     LValue UpAddrLVal =
4127         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4128     llvm::Value *UpAddr =
4129         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4130     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4131     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4132     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4133   } else {
4134     SizeVal = CGF.getTypeSize(Ty);
4135   }
4136   return std::make_pair(Addr, SizeVal);
4137 }
4138 
4139 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4140 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4141   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4142   if (KmpTaskAffinityInfoTy.isNull()) {
4143     RecordDecl *KmpAffinityInfoRD =
4144         C.buildImplicitRecord("kmp_task_affinity_info_t");
4145     KmpAffinityInfoRD->startDefinition();
4146     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4147     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4149     KmpAffinityInfoRD->completeDefinition();
4150     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4151   }
4152 }
4153 
4154 CGOpenMPRuntime::TaskResultTy
4155 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4156                               const OMPExecutableDirective &D,
4157                               llvm::Function *TaskFunction, QualType SharedsTy,
4158                               Address Shareds, const OMPTaskDataTy &Data) {
4159   ASTContext &C = CGM.getContext();
4160   llvm::SmallVector<PrivateDataTy, 4> Privates;
4161   // Aggregate privates and sort them by the alignment.
4162   const auto *I = Data.PrivateCopies.begin();
4163   for (const Expr *E : Data.PrivateVars) {
4164     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4165     Privates.emplace_back(
4166         C.getDeclAlign(VD),
4167         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4168                          /*PrivateElemInit=*/nullptr));
4169     ++I;
4170   }
4171   I = Data.FirstprivateCopies.begin();
4172   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4173   for (const Expr *E : Data.FirstprivateVars) {
4174     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4175     Privates.emplace_back(
4176         C.getDeclAlign(VD),
4177         PrivateHelpersTy(
4178             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4179             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4180     ++I;
4181     ++IElemInitRef;
4182   }
4183   I = Data.LastprivateCopies.begin();
4184   for (const Expr *E : Data.LastprivateVars) {
4185     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4186     Privates.emplace_back(
4187         C.getDeclAlign(VD),
4188         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4189                          /*PrivateElemInit=*/nullptr));
4190     ++I;
4191   }
4192   for (const VarDecl *VD : Data.PrivateLocals) {
4193     if (isAllocatableDecl(VD))
4194       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4195     else
4196       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4197   }
4198   llvm::stable_sort(Privates,
4199                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4200                       return L.first > R.first;
4201                     });
4202   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4203   // Build type kmp_routine_entry_t (if not built yet).
4204   emitKmpRoutineEntryT(KmpInt32Ty);
4205   // Build type kmp_task_t (if not built yet).
4206   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4207     if (SavedKmpTaskloopTQTy.isNull()) {
4208       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4209           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4210     }
4211     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4212   } else {
4213     assert((D.getDirectiveKind() == OMPD_task ||
4214             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4215             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4216            "Expected taskloop, task or target directive");
4217     if (SavedKmpTaskTQTy.isNull()) {
4218       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4219           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4220     }
4221     KmpTaskTQTy = SavedKmpTaskTQTy;
4222   }
4223   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4224   // Build particular struct kmp_task_t for the given task.
4225   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4226       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4227   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4228   QualType KmpTaskTWithPrivatesPtrQTy =
4229       C.getPointerType(KmpTaskTWithPrivatesQTy);
4230   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4231   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4232       KmpTaskTWithPrivatesTy->getPointerTo();
4233   llvm::Value *KmpTaskTWithPrivatesTySize =
4234       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4235   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4236 
4237   // Emit initial values for private copies (if any).
4238   llvm::Value *TaskPrivatesMap = nullptr;
4239   llvm::Type *TaskPrivatesMapTy =
4240       std::next(TaskFunction->arg_begin(), 3)->getType();
4241   if (!Privates.empty()) {
4242     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4243     TaskPrivatesMap =
4244         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4245     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4246         TaskPrivatesMap, TaskPrivatesMapTy);
4247   } else {
4248     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4249         cast<llvm::PointerType>(TaskPrivatesMapTy));
4250   }
4251   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4252   // kmp_task_t *tt);
4253   llvm::Function *TaskEntry = emitProxyTaskFunction(
4254       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4255       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4256       TaskPrivatesMap);
4257 
4258   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4259   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4260   // kmp_routine_entry_t *task_entry);
4261   // Task flags. Format is taken from
4262   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4263   // description of kmp_tasking_flags struct.
4264   enum {
4265     TiedFlag = 0x1,
4266     FinalFlag = 0x2,
4267     DestructorsFlag = 0x8,
4268     PriorityFlag = 0x20,
4269     DetachableFlag = 0x40,
4270   };
4271   unsigned Flags = Data.Tied ? TiedFlag : 0;
4272   bool NeedsCleanup = false;
4273   if (!Privates.empty()) {
4274     NeedsCleanup =
4275         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4276     if (NeedsCleanup)
4277       Flags = Flags | DestructorsFlag;
4278   }
4279   if (Data.Priority.getInt())
4280     Flags = Flags | PriorityFlag;
4281   if (D.hasClausesOfKind<OMPDetachClause>())
4282     Flags = Flags | DetachableFlag;
4283   llvm::Value *TaskFlags =
4284       Data.Final.getPointer()
4285           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4286                                      CGF.Builder.getInt32(FinalFlag),
4287                                      CGF.Builder.getInt32(/*C=*/0))
4288           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4289   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4290   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4291   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4292       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4293       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4294           TaskEntry, KmpRoutineEntryPtrTy)};
4295   llvm::Value *NewTask;
4296   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4297     // Check if we have any device clause associated with the directive.
4298     const Expr *Device = nullptr;
4299     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4300       Device = C->getDevice();
4301     // Emit device ID if any otherwise use default value.
4302     llvm::Value *DeviceID;
4303     if (Device)
4304       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4305                                            CGF.Int64Ty, /*isSigned=*/true);
4306     else
4307       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4308     AllocArgs.push_back(DeviceID);
4309     NewTask = CGF.EmitRuntimeCall(
4310         OMPBuilder.getOrCreateRuntimeFunction(
4311             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4312         AllocArgs);
4313   } else {
4314     NewTask =
4315         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4316                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4317                             AllocArgs);
4318   }
4319   // Emit detach clause initialization.
4320   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4321   // task_descriptor);
4322   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4323     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4324     LValue EvtLVal = CGF.EmitLValue(Evt);
4325 
4326     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4327     // int gtid, kmp_task_t *task);
4328     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4329     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4330     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4331     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4332         OMPBuilder.getOrCreateRuntimeFunction(
4333             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4334         {Loc, Tid, NewTask});
4335     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4336                                       Evt->getExprLoc());
4337     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4338   }
4339   // Process affinity clauses.
4340   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4341     // Process list of affinity data.
4342     ASTContext &C = CGM.getContext();
4343     Address AffinitiesArray = Address::invalid();
4344     // Calculate number of elements to form the array of affinity data.
4345     llvm::Value *NumOfElements = nullptr;
4346     unsigned NumAffinities = 0;
4347     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4348       if (const Expr *Modifier = C->getModifier()) {
4349         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4350         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4351           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4352           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4353           NumOfElements =
4354               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4355         }
4356       } else {
4357         NumAffinities += C->varlist_size();
4358       }
4359     }
4360     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4361     // Fields ids in kmp_task_affinity_info record.
4362     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4363 
4364     QualType KmpTaskAffinityInfoArrayTy;
4365     if (NumOfElements) {
4366       NumOfElements = CGF.Builder.CreateNUWAdd(
4367           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4368       OpaqueValueExpr OVE(
4369           Loc,
4370           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4371           VK_RValue);
4372       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4373                                                     RValue::get(NumOfElements));
4374       KmpTaskAffinityInfoArrayTy =
4375           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4376                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4377       // Properly emit variable-sized array.
4378       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4379                                            ImplicitParamDecl::Other);
4380       CGF.EmitVarDecl(*PD);
4381       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4382       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4383                                                 /*isSigned=*/false);
4384     } else {
4385       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4386           KmpTaskAffinityInfoTy,
4387           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4388           ArrayType::Normal, /*IndexTypeQuals=*/0);
4389       AffinitiesArray =
4390           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4391       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4392       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4393                                              /*isSigned=*/false);
4394     }
4395 
4396     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4397     // Fill array by elements without iterators.
4398     unsigned Pos = 0;
4399     bool HasIterator = false;
4400     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4401       if (C->getModifier()) {
4402         HasIterator = true;
4403         continue;
4404       }
4405       for (const Expr *E : C->varlists()) {
4406         llvm::Value *Addr;
4407         llvm::Value *Size;
4408         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4409         LValue Base =
4410             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4411                                KmpTaskAffinityInfoTy);
4412         // affs[i].base_addr = &<Affinities[i].second>;
4413         LValue BaseAddrLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4415         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4416                               BaseAddrLVal);
4417         // affs[i].len = sizeof(<Affinities[i].second>);
4418         LValue LenLVal = CGF.EmitLValueForField(
4419             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4420         CGF.EmitStoreOfScalar(Size, LenLVal);
4421         ++Pos;
4422       }
4423     }
4424     LValue PosLVal;
4425     if (HasIterator) {
4426       PosLVal = CGF.MakeAddrLValue(
4427           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4428           C.getSizeType());
4429       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4430     }
4431     // Process elements with iterators.
4432     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4433       const Expr *Modifier = C->getModifier();
4434       if (!Modifier)
4435         continue;
4436       OMPIteratorGeneratorScope IteratorScope(
4437           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4438       for (const Expr *E : C->varlists()) {
4439         llvm::Value *Addr;
4440         llvm::Value *Size;
4441         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4442         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4443         LValue Base = CGF.MakeAddrLValue(
4444             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4445                     AffinitiesArray.getAlignment()),
4446             KmpTaskAffinityInfoTy);
4447         // affs[i].base_addr = &<Affinities[i].second>;
4448         LValue BaseAddrLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4450         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4451                               BaseAddrLVal);
4452         // affs[i].len = sizeof(<Affinities[i].second>);
4453         LValue LenLVal = CGF.EmitLValueForField(
4454             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4455         CGF.EmitStoreOfScalar(Size, LenLVal);
4456         Idx = CGF.Builder.CreateNUWAdd(
4457             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4458         CGF.EmitStoreOfScalar(Idx, PosLVal);
4459       }
4460     }
4461     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4462     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4463     // naffins, kmp_task_affinity_info_t *affin_list);
4464     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4465     llvm::Value *GTid = getThreadID(CGF, Loc);
4466     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4467         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4468     // FIXME: Emit the function and ignore its result for now unless the
4469     // runtime function is properly implemented.
4470     (void)CGF.EmitRuntimeCall(
4471         OMPBuilder.getOrCreateRuntimeFunction(
4472             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4473         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4474   }
4475   llvm::Value *NewTaskNewTaskTTy =
4476       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4477           NewTask, KmpTaskTWithPrivatesPtrTy);
4478   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4479                                                KmpTaskTWithPrivatesQTy);
4480   LValue TDBase =
4481       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4482   // Fill the data in the resulting kmp_task_t record.
4483   // Copy shareds if there are any.
4484   Address KmpTaskSharedsPtr = Address::invalid();
4485   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4486     KmpTaskSharedsPtr =
4487         Address(CGF.EmitLoadOfScalar(
4488                     CGF.EmitLValueForField(
4489                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4490                                            KmpTaskTShareds)),
4491                     Loc),
4492                 CGM.getNaturalTypeAlignment(SharedsTy));
4493     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4494     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4495     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4496   }
4497   // Emit initial values for private copies (if any).
4498   TaskResultTy Result;
4499   if (!Privates.empty()) {
4500     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4501                      SharedsTy, SharedsPtrTy, Data, Privates,
4502                      /*ForDup=*/false);
4503     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4504         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4505       Result.TaskDupFn = emitTaskDupFunction(
4506           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4507           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4508           /*WithLastIter=*/!Data.LastprivateVars.empty());
4509     }
4510   }
4511   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4512   enum { Priority = 0, Destructors = 1 };
4513   // Provide pointer to function with destructors for privates.
4514   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4515   const RecordDecl *KmpCmplrdataUD =
4516       (*FI)->getType()->getAsUnionType()->getDecl();
4517   if (NeedsCleanup) {
4518     llvm::Value *DestructorFn = emitDestructorsFunction(
4519         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4520         KmpTaskTWithPrivatesQTy);
4521     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4522     LValue DestructorsLV = CGF.EmitLValueForField(
4523         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4524     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525                               DestructorFn, KmpRoutineEntryPtrTy),
4526                           DestructorsLV);
4527   }
4528   // Set priority.
4529   if (Data.Priority.getInt()) {
4530     LValue Data2LV = CGF.EmitLValueForField(
4531         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4532     LValue PriorityLV = CGF.EmitLValueForField(
4533         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4534     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4535   }
4536   Result.NewTask = NewTask;
4537   Result.TaskEntry = TaskEntry;
4538   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4539   Result.TDBase = TDBase;
4540   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4541   return Result;
4542 }
4543 
4544 namespace {
4545 /// Dependence kind for RTL.
4546 enum RTLDependenceKindTy {
4547   DepIn = 0x01,
4548   DepInOut = 0x3,
4549   DepMutexInOutSet = 0x4
4550 };
4551 /// Fields ids in kmp_depend_info record.
4552 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4553 } // namespace
4554 
4555 /// Translates internal dependency kind into the runtime kind.
4556 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4557   RTLDependenceKindTy DepKind;
4558   switch (K) {
4559   case OMPC_DEPEND_in:
4560     DepKind = DepIn;
4561     break;
4562   // Out and InOut dependencies must use the same code.
4563   case OMPC_DEPEND_out:
4564   case OMPC_DEPEND_inout:
4565     DepKind = DepInOut;
4566     break;
4567   case OMPC_DEPEND_mutexinoutset:
4568     DepKind = DepMutexInOutSet;
4569     break;
4570   case OMPC_DEPEND_source:
4571   case OMPC_DEPEND_sink:
4572   case OMPC_DEPEND_depobj:
4573   case OMPC_DEPEND_unknown:
4574     llvm_unreachable("Unknown task dependence type");
4575   }
4576   return DepKind;
4577 }
4578 
4579 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4580 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4581                            QualType &FlagsTy) {
4582   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4583   if (KmpDependInfoTy.isNull()) {
4584     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4585     KmpDependInfoRD->startDefinition();
4586     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4587     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4588     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4589     KmpDependInfoRD->completeDefinition();
4590     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4591   }
4592 }
4593 
4594 std::pair<llvm::Value *, LValue>
4595 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4596                                    SourceLocation Loc) {
4597   ASTContext &C = CGM.getContext();
4598   QualType FlagsTy;
4599   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4600   RecordDecl *KmpDependInfoRD =
4601       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4602   LValue Base = CGF.EmitLoadOfPointerLValue(
4603       DepobjLVal.getAddress(CGF),
4604       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4605   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4606   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4607           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4608   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4609                             Base.getTBAAInfo());
4610   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4611       Addr.getPointer(),
4612       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4613   LValue NumDepsBase = CGF.MakeAddrLValue(
4614       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4615       Base.getBaseInfo(), Base.getTBAAInfo());
4616   // NumDeps = deps[i].base_addr;
4617   LValue BaseAddrLVal = CGF.EmitLValueForField(
4618       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4619   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4620   return std::make_pair(NumDeps, Base);
4621 }
4622 
4623 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4624                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4625                            const OMPTaskDataTy::DependData &Data,
4626                            Address DependenciesArray) {
4627   CodeGenModule &CGM = CGF.CGM;
4628   ASTContext &C = CGM.getContext();
4629   QualType FlagsTy;
4630   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4631   RecordDecl *KmpDependInfoRD =
4632       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4633   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4634 
4635   OMPIteratorGeneratorScope IteratorScope(
4636       CGF, cast_or_null<OMPIteratorExpr>(
4637                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4638                                  : nullptr));
4639   for (const Expr *E : Data.DepExprs) {
4640     llvm::Value *Addr;
4641     llvm::Value *Size;
4642     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4643     LValue Base;
4644     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4645       Base = CGF.MakeAddrLValue(
4646           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4647     } else {
4648       LValue &PosLVal = *Pos.get<LValue *>();
4649       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4650       Base = CGF.MakeAddrLValue(
4651           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4652                   DependenciesArray.getAlignment()),
4653           KmpDependInfoTy);
4654     }
4655     // deps[i].base_addr = &<Dependencies[i].second>;
4656     LValue BaseAddrLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4658     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4659                           BaseAddrLVal);
4660     // deps[i].len = sizeof(<Dependencies[i].second>);
4661     LValue LenLVal = CGF.EmitLValueForField(
4662         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4663     CGF.EmitStoreOfScalar(Size, LenLVal);
4664     // deps[i].flags = <Dependencies[i].first>;
4665     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4666     LValue FlagsLVal = CGF.EmitLValueForField(
4667         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4668     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4669                           FlagsLVal);
4670     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4671       ++(*P);
4672     } else {
4673       LValue &PosLVal = *Pos.get<LValue *>();
4674       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4675       Idx = CGF.Builder.CreateNUWAdd(Idx,
4676                                      llvm::ConstantInt::get(Idx->getType(), 1));
4677       CGF.EmitStoreOfScalar(Idx, PosLVal);
4678     }
4679   }
4680 }
4681 
4682 static SmallVector<llvm::Value *, 4>
4683 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4684                         const OMPTaskDataTy::DependData &Data) {
4685   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4686          "Expected depobj dependecy kind.");
4687   SmallVector<llvm::Value *, 4> Sizes;
4688   SmallVector<LValue, 4> SizeLVals;
4689   ASTContext &C = CGF.getContext();
4690   QualType FlagsTy;
4691   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4692   RecordDecl *KmpDependInfoRD =
4693       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4694   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4695   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4696   {
4697     OMPIteratorGeneratorScope IteratorScope(
4698         CGF, cast_or_null<OMPIteratorExpr>(
4699                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4700                                    : nullptr));
4701     for (const Expr *E : Data.DepExprs) {
4702       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4703       LValue Base = CGF.EmitLoadOfPointerLValue(
4704           DepobjLVal.getAddress(CGF),
4705           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4706       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4707           Base.getAddress(CGF), KmpDependInfoPtrT);
4708       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4709                                 Base.getTBAAInfo());
4710       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4711           Addr.getPointer(),
4712           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4713       LValue NumDepsBase = CGF.MakeAddrLValue(
4714           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4715           Base.getBaseInfo(), Base.getTBAAInfo());
4716       // NumDeps = deps[i].base_addr;
4717       LValue BaseAddrLVal = CGF.EmitLValueForField(
4718           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4719       llvm::Value *NumDeps =
4720           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4721       LValue NumLVal = CGF.MakeAddrLValue(
4722           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4723           C.getUIntPtrType());
4724       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4725                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4726       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4727       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4728       CGF.EmitStoreOfScalar(Add, NumLVal);
4729       SizeLVals.push_back(NumLVal);
4730     }
4731   }
4732   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4733     llvm::Value *Size =
4734         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4735     Sizes.push_back(Size);
4736   }
4737   return Sizes;
4738 }
4739 
4740 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4741                                LValue PosLVal,
4742                                const OMPTaskDataTy::DependData &Data,
4743                                Address DependenciesArray) {
4744   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745          "Expected depobj dependecy kind.");
4746   ASTContext &C = CGF.getContext();
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   RecordDecl *KmpDependInfoRD =
4750       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4751   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4752   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4753   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4754   {
4755     OMPIteratorGeneratorScope IteratorScope(
4756         CGF, cast_or_null<OMPIteratorExpr>(
4757                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4758                                    : nullptr));
4759     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4760       const Expr *E = Data.DepExprs[I];
4761       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4762       LValue Base = CGF.EmitLoadOfPointerLValue(
4763           DepobjLVal.getAddress(CGF),
4764           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4765       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766           Base.getAddress(CGF), KmpDependInfoPtrT);
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769 
4770       // Get number of elements in a single depobj.
4771       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4772           Addr.getPointer(),
4773           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4774       LValue NumDepsBase = CGF.MakeAddrLValue(
4775           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4776           Base.getBaseInfo(), Base.getTBAAInfo());
4777       // NumDeps = deps[i].base_addr;
4778       LValue BaseAddrLVal = CGF.EmitLValueForField(
4779           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4780       llvm::Value *NumDeps =
4781           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4782 
4783       // memcopy dependency data.
4784       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4785           ElSize,
4786           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4787       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4788       Address DepAddr =
4789           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4790                   DependenciesArray.getAlignment());
4791       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4792 
4793       // Increase pos.
4794       // pos += size;
4795       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4796       CGF.EmitStoreOfScalar(Add, PosLVal);
4797     }
4798   }
4799 }
4800 
4801 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4802     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4803     SourceLocation Loc) {
4804   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4805         return D.DepExprs.empty();
4806       }))
4807     return std::make_pair(nullptr, Address::invalid());
4808   // Process list of dependencies.
4809   ASTContext &C = CGM.getContext();
4810   Address DependenciesArray = Address::invalid();
4811   llvm::Value *NumOfElements = nullptr;
4812   unsigned NumDependencies = std::accumulate(
4813       Dependencies.begin(), Dependencies.end(), 0,
4814       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4815         return D.DepKind == OMPC_DEPEND_depobj
4816                    ? V
4817                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4818       });
4819   QualType FlagsTy;
4820   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4821   bool HasDepobjDeps = false;
4822   bool HasRegularWithIterators = false;
4823   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4824   llvm::Value *NumOfRegularWithIterators =
4825       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4826   // Calculate number of depobj dependecies and regular deps with the iterators.
4827   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4828     if (D.DepKind == OMPC_DEPEND_depobj) {
4829       SmallVector<llvm::Value *, 4> Sizes =
4830           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4831       for (llvm::Value *Size : Sizes) {
4832         NumOfDepobjElements =
4833             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4834       }
4835       HasDepobjDeps = true;
4836       continue;
4837     }
4838     // Include number of iterations, if any.
4839     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4840       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4841         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4842         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4843         NumOfRegularWithIterators =
4844             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4845       }
4846       HasRegularWithIterators = true;
4847       continue;
4848     }
4849   }
4850 
4851   QualType KmpDependInfoArrayTy;
4852   if (HasDepobjDeps || HasRegularWithIterators) {
4853     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4854                                            /*isSigned=*/false);
4855     if (HasDepobjDeps) {
4856       NumOfElements =
4857           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4858     }
4859     if (HasRegularWithIterators) {
4860       NumOfElements =
4861           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4862     }
4863     OpaqueValueExpr OVE(Loc,
4864                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4865                         VK_RValue);
4866     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4867                                                   RValue::get(NumOfElements));
4868     KmpDependInfoArrayTy =
4869         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4870                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4871     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4872     // Properly emit variable-sized array.
4873     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4874                                          ImplicitParamDecl::Other);
4875     CGF.EmitVarDecl(*PD);
4876     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4877     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4878                                               /*isSigned=*/false);
4879   } else {
4880     KmpDependInfoArrayTy = C.getConstantArrayType(
4881         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4882         ArrayType::Normal, /*IndexTypeQuals=*/0);
4883     DependenciesArray =
4884         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4885     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4886     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4887                                            /*isSigned=*/false);
4888   }
4889   unsigned Pos = 0;
4890   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4891     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4892         Dependencies[I].IteratorExpr)
4893       continue;
4894     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4895                    DependenciesArray);
4896   }
4897   // Copy regular dependecies with iterators.
4898   LValue PosLVal = CGF.MakeAddrLValue(
4899       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4900   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4901   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4902     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4903         !Dependencies[I].IteratorExpr)
4904       continue;
4905     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4906                    DependenciesArray);
4907   }
4908   // Copy final depobj arrays without iterators.
4909   if (HasDepobjDeps) {
4910     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4911       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4912         continue;
4913       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4914                          DependenciesArray);
4915     }
4916   }
4917   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4918       DependenciesArray, CGF.VoidPtrTy);
4919   return std::make_pair(NumOfElements, DependenciesArray);
4920 }
4921 
4922 Address CGOpenMPRuntime::emitDepobjDependClause(
4923     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4924     SourceLocation Loc) {
4925   if (Dependencies.DepExprs.empty())
4926     return Address::invalid();
4927   // Process list of dependencies.
4928   ASTContext &C = CGM.getContext();
4929   Address DependenciesArray = Address::invalid();
4930   unsigned NumDependencies = Dependencies.DepExprs.size();
4931   QualType FlagsTy;
4932   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4933   RecordDecl *KmpDependInfoRD =
4934       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4935 
4936   llvm::Value *Size;
4937   // Define type kmp_depend_info[<Dependencies.size()>];
4938   // For depobj reserve one extra element to store the number of elements.
4939   // It is required to handle depobj(x) update(in) construct.
4940   // kmp_depend_info[<Dependencies.size()>] deps;
4941   llvm::Value *NumDepsVal;
4942   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4943   if (const auto *IE =
4944           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4945     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4946     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4947       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4948       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4949       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4950     }
4951     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4952                                     NumDepsVal);
4953     CharUnits SizeInBytes =
4954         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4955     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4956     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4957     NumDepsVal =
4958         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4959   } else {
4960     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4961         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4962         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4963     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4964     Size = CGM.getSize(Sz.alignTo(Align));
4965     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4966   }
4967   // Need to allocate on the dynamic memory.
4968   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4969   // Use default allocator.
4970   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4971   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4972 
4973   llvm::Value *Addr =
4974       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4975                               CGM.getModule(), OMPRTL___kmpc_alloc),
4976                           Args, ".dep.arr.addr");
4977   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4978       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4979   DependenciesArray = Address(Addr, Align);
4980   // Write number of elements in the first element of array for depobj.
4981   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4982   // deps[i].base_addr = NumDependencies;
4983   LValue BaseAddrLVal = CGF.EmitLValueForField(
4984       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4985   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4986   llvm::PointerUnion<unsigned *, LValue *> Pos;
4987   unsigned Idx = 1;
4988   LValue PosLVal;
4989   if (Dependencies.IteratorExpr) {
4990     PosLVal = CGF.MakeAddrLValue(
4991         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4992         C.getSizeType());
4993     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4994                           /*IsInit=*/true);
4995     Pos = &PosLVal;
4996   } else {
4997     Pos = &Idx;
4998   }
4999   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5000   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5001       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5002   return DependenciesArray;
5003 }
5004 
5005 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5006                                         SourceLocation Loc) {
5007   ASTContext &C = CGM.getContext();
5008   QualType FlagsTy;
5009   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5010   LValue Base = CGF.EmitLoadOfPointerLValue(
5011       DepobjLVal.getAddress(CGF),
5012       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5013   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5014   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5015       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5016   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5017       Addr.getPointer(),
5018       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5019   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5020                                                                CGF.VoidPtrTy);
5021   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5022   // Use default allocator.
5023   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5024   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5025 
5026   // _kmpc_free(gtid, addr, nullptr);
5027   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5028                                 CGM.getModule(), OMPRTL___kmpc_free),
5029                             Args);
5030 }
5031 
5032 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5033                                        OpenMPDependClauseKind NewDepKind,
5034                                        SourceLocation Loc) {
5035   ASTContext &C = CGM.getContext();
5036   QualType FlagsTy;
5037   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5038   RecordDecl *KmpDependInfoRD =
5039       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5040   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5041   llvm::Value *NumDeps;
5042   LValue Base;
5043   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5044 
5045   Address Begin = Base.getAddress(CGF);
5046   // Cast from pointer to array type to pointer to single element.
5047   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5048   // The basic structure here is a while-do loop.
5049   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5050   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5051   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5052   CGF.EmitBlock(BodyBB);
5053   llvm::PHINode *ElementPHI =
5054       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5055   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5056   Begin = Address(ElementPHI, Begin.getAlignment());
5057   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5058                             Base.getTBAAInfo());
5059   // deps[i].flags = NewDepKind;
5060   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5061   LValue FlagsLVal = CGF.EmitLValueForField(
5062       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5063   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5064                         FlagsLVal);
5065 
5066   // Shift the address forward by one element.
5067   Address ElementNext =
5068       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5069   ElementPHI->addIncoming(ElementNext.getPointer(),
5070                           CGF.Builder.GetInsertBlock());
5071   llvm::Value *IsEmpty =
5072       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5073   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5074   // Done.
5075   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5076 }
5077 
5078 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5079                                    const OMPExecutableDirective &D,
5080                                    llvm::Function *TaskFunction,
5081                                    QualType SharedsTy, Address Shareds,
5082                                    const Expr *IfCond,
5083                                    const OMPTaskDataTy &Data) {
5084   if (!CGF.HaveInsertPoint())
5085     return;
5086 
5087   TaskResultTy Result =
5088       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5089   llvm::Value *NewTask = Result.NewTask;
5090   llvm::Function *TaskEntry = Result.TaskEntry;
5091   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5092   LValue TDBase = Result.TDBase;
5093   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5094   // Process list of dependences.
5095   Address DependenciesArray = Address::invalid();
5096   llvm::Value *NumOfElements;
5097   std::tie(NumOfElements, DependenciesArray) =
5098       emitDependClause(CGF, Data.Dependences, Loc);
5099 
5100   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5101   // libcall.
5102   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5103   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5104   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5105   // list is not empty
5106   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5107   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5108   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5109   llvm::Value *DepTaskArgs[7];
5110   if (!Data.Dependences.empty()) {
5111     DepTaskArgs[0] = UpLoc;
5112     DepTaskArgs[1] = ThreadID;
5113     DepTaskArgs[2] = NewTask;
5114     DepTaskArgs[3] = NumOfElements;
5115     DepTaskArgs[4] = DependenciesArray.getPointer();
5116     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5117     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5118   }
5119   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5120                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5121     if (!Data.Tied) {
5122       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5123       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5124       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5125     }
5126     if (!Data.Dependences.empty()) {
5127       CGF.EmitRuntimeCall(
5128           OMPBuilder.getOrCreateRuntimeFunction(
5129               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5130           DepTaskArgs);
5131     } else {
5132       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5133                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5134                           TaskArgs);
5135     }
5136     // Check if parent region is untied and build return for untied task;
5137     if (auto *Region =
5138             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5139       Region->emitUntiedSwitch(CGF);
5140   };
5141 
5142   llvm::Value *DepWaitTaskArgs[6];
5143   if (!Data.Dependences.empty()) {
5144     DepWaitTaskArgs[0] = UpLoc;
5145     DepWaitTaskArgs[1] = ThreadID;
5146     DepWaitTaskArgs[2] = NumOfElements;
5147     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5148     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5149     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5150   }
5151   auto &M = CGM.getModule();
5152   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5153                         TaskEntry, &Data, &DepWaitTaskArgs,
5154                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5155     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5156     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5157     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5158     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5159     // is specified.
5160     if (!Data.Dependences.empty())
5161       CGF.EmitRuntimeCall(
5162           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5163           DepWaitTaskArgs);
5164     // Call proxy_task_entry(gtid, new_task);
5165     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5166                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5167       Action.Enter(CGF);
5168       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5169       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5170                                                           OutlinedFnArgs);
5171     };
5172 
5173     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5174     // kmp_task_t *new_task);
5175     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5176     // kmp_task_t *new_task);
5177     RegionCodeGenTy RCG(CodeGen);
5178     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5179                               M, OMPRTL___kmpc_omp_task_begin_if0),
5180                           TaskArgs,
5181                           OMPBuilder.getOrCreateRuntimeFunction(
5182                               M, OMPRTL___kmpc_omp_task_complete_if0),
5183                           TaskArgs);
5184     RCG.setAction(Action);
5185     RCG(CGF);
5186   };
5187 
5188   if (IfCond) {
5189     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5190   } else {
5191     RegionCodeGenTy ThenRCG(ThenCodeGen);
5192     ThenRCG(CGF);
5193   }
5194 }
5195 
5196 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5197                                        const OMPLoopDirective &D,
5198                                        llvm::Function *TaskFunction,
5199                                        QualType SharedsTy, Address Shareds,
5200                                        const Expr *IfCond,
5201                                        const OMPTaskDataTy &Data) {
5202   if (!CGF.HaveInsertPoint())
5203     return;
5204   TaskResultTy Result =
5205       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5206   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5207   // libcall.
5208   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5209   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5210   // sched, kmp_uint64 grainsize, void *task_dup);
5211   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5212   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5213   llvm::Value *IfVal;
5214   if (IfCond) {
5215     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5216                                       /*isSigned=*/true);
5217   } else {
5218     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5219   }
5220 
5221   LValue LBLVal = CGF.EmitLValueForField(
5222       Result.TDBase,
5223       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5224   const auto *LBVar =
5225       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5226   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5227                        LBLVal.getQuals(),
5228                        /*IsInitializer=*/true);
5229   LValue UBLVal = CGF.EmitLValueForField(
5230       Result.TDBase,
5231       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5232   const auto *UBVar =
5233       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5234   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5235                        UBLVal.getQuals(),
5236                        /*IsInitializer=*/true);
5237   LValue StLVal = CGF.EmitLValueForField(
5238       Result.TDBase,
5239       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5240   const auto *StVar =
5241       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5242   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5243                        StLVal.getQuals(),
5244                        /*IsInitializer=*/true);
5245   // Store reductions address.
5246   LValue RedLVal = CGF.EmitLValueForField(
5247       Result.TDBase,
5248       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5249   if (Data.Reductions) {
5250     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5251   } else {
5252     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5253                                CGF.getContext().VoidPtrTy);
5254   }
5255   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5256   llvm::Value *TaskArgs[] = {
5257       UpLoc,
5258       ThreadID,
5259       Result.NewTask,
5260       IfVal,
5261       LBLVal.getPointer(CGF),
5262       UBLVal.getPointer(CGF),
5263       CGF.EmitLoadOfScalar(StLVal, Loc),
5264       llvm::ConstantInt::getSigned(
5265           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5266       llvm::ConstantInt::getSigned(
5267           CGF.IntTy, Data.Schedule.getPointer()
5268                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5269                          : NoSchedule),
5270       Data.Schedule.getPointer()
5271           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5272                                       /*isSigned=*/false)
5273           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5274       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5275                              Result.TaskDupFn, CGF.VoidPtrTy)
5276                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5277   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5278                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5279                       TaskArgs);
5280 }
5281 
5282 /// Emit reduction operation for each element of array (required for
5283 /// array sections) LHS op = RHS.
5284 /// \param Type Type of array.
5285 /// \param LHSVar Variable on the left side of the reduction operation
5286 /// (references element of array in original variable).
5287 /// \param RHSVar Variable on the right side of the reduction operation
5288 /// (references element of array in original variable).
5289 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5290 /// RHSVar.
5291 static void EmitOMPAggregateReduction(
5292     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5293     const VarDecl *RHSVar,
5294     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5295                                   const Expr *, const Expr *)> &RedOpGen,
5296     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5297     const Expr *UpExpr = nullptr) {
5298   // Perform element-by-element initialization.
5299   QualType ElementTy;
5300   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5301   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5302 
5303   // Drill down to the base element type on both arrays.
5304   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5305   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5306 
5307   llvm::Value *RHSBegin = RHSAddr.getPointer();
5308   llvm::Value *LHSBegin = LHSAddr.getPointer();
5309   // Cast from pointer to array type to pointer to single element.
5310   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5311   // The basic structure here is a while-do loop.
5312   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5313   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5314   llvm::Value *IsEmpty =
5315       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5316   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5317 
5318   // Enter the loop body, making that address the current address.
5319   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5320   CGF.EmitBlock(BodyBB);
5321 
5322   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5323 
5324   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5325       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5326   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5327   Address RHSElementCurrent =
5328       Address(RHSElementPHI,
5329               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5330 
5331   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5332       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5333   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5334   Address LHSElementCurrent =
5335       Address(LHSElementPHI,
5336               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5337 
5338   // Emit copy.
5339   CodeGenFunction::OMPPrivateScope Scope(CGF);
5340   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5341   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5342   Scope.Privatize();
5343   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5344   Scope.ForceCleanup();
5345 
5346   // Shift the address forward by one element.
5347   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5348       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5349   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5350       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5351   // Check whether we've reached the end.
5352   llvm::Value *Done =
5353       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5354   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5355   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5356   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5357 
5358   // Done.
5359   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5360 }
5361 
5362 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5363 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5364 /// UDR combiner function.
5365 static void emitReductionCombiner(CodeGenFunction &CGF,
5366                                   const Expr *ReductionOp) {
5367   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5368     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5369       if (const auto *DRE =
5370               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5371         if (const auto *DRD =
5372                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5373           std::pair<llvm::Function *, llvm::Function *> Reduction =
5374               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5375           RValue Func = RValue::get(Reduction.first);
5376           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5377           CGF.EmitIgnoredExpr(ReductionOp);
5378           return;
5379         }
5380   CGF.EmitIgnoredExpr(ReductionOp);
5381 }
5382 
5383 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5384     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5385     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5386     ArrayRef<const Expr *> ReductionOps) {
5387   ASTContext &C = CGM.getContext();
5388 
5389   // void reduction_func(void *LHSArg, void *RHSArg);
5390   FunctionArgList Args;
5391   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5392                            ImplicitParamDecl::Other);
5393   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5394                            ImplicitParamDecl::Other);
5395   Args.push_back(&LHSArg);
5396   Args.push_back(&RHSArg);
5397   const auto &CGFI =
5398       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5399   std::string Name = getName({"omp", "reduction", "reduction_func"});
5400   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5401                                     llvm::GlobalValue::InternalLinkage, Name,
5402                                     &CGM.getModule());
5403   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5404   Fn->setDoesNotRecurse();
5405   CodeGenFunction CGF(CGM);
5406   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5407 
5408   // Dst = (void*[n])(LHSArg);
5409   // Src = (void*[n])(RHSArg);
5410   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5411       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5412       ArgsType), CGF.getPointerAlign());
5413   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5414       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5415       ArgsType), CGF.getPointerAlign());
5416 
5417   //  ...
5418   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5419   //  ...
5420   CodeGenFunction::OMPPrivateScope Scope(CGF);
5421   auto IPriv = Privates.begin();
5422   unsigned Idx = 0;
5423   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5424     const auto *RHSVar =
5425         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5426     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5427       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5428     });
5429     const auto *LHSVar =
5430         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5431     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5432       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5433     });
5434     QualType PrivTy = (*IPriv)->getType();
5435     if (PrivTy->isVariablyModifiedType()) {
5436       // Get array size and emit VLA type.
5437       ++Idx;
5438       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5439       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5440       const VariableArrayType *VLA =
5441           CGF.getContext().getAsVariableArrayType(PrivTy);
5442       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5443       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5444           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5445       CGF.EmitVariablyModifiedType(PrivTy);
5446     }
5447   }
5448   Scope.Privatize();
5449   IPriv = Privates.begin();
5450   auto ILHS = LHSExprs.begin();
5451   auto IRHS = RHSExprs.begin();
5452   for (const Expr *E : ReductionOps) {
5453     if ((*IPriv)->getType()->isArrayType()) {
5454       // Emit reduction for array section.
5455       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5456       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5457       EmitOMPAggregateReduction(
5458           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5459           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5460             emitReductionCombiner(CGF, E);
5461           });
5462     } else {
5463       // Emit reduction for array subscript or single variable.
5464       emitReductionCombiner(CGF, E);
5465     }
5466     ++IPriv;
5467     ++ILHS;
5468     ++IRHS;
5469   }
5470   Scope.ForceCleanup();
5471   CGF.FinishFunction();
5472   return Fn;
5473 }
5474 
5475 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5476                                                   const Expr *ReductionOp,
5477                                                   const Expr *PrivateRef,
5478                                                   const DeclRefExpr *LHS,
5479                                                   const DeclRefExpr *RHS) {
5480   if (PrivateRef->getType()->isArrayType()) {
5481     // Emit reduction for array section.
5482     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5483     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5484     EmitOMPAggregateReduction(
5485         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5486         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5487           emitReductionCombiner(CGF, ReductionOp);
5488         });
5489   } else {
5490     // Emit reduction for array subscript or single variable.
5491     emitReductionCombiner(CGF, ReductionOp);
5492   }
5493 }
5494 
5495 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5496                                     ArrayRef<const Expr *> Privates,
5497                                     ArrayRef<const Expr *> LHSExprs,
5498                                     ArrayRef<const Expr *> RHSExprs,
5499                                     ArrayRef<const Expr *> ReductionOps,
5500                                     ReductionOptionsTy Options) {
5501   if (!CGF.HaveInsertPoint())
5502     return;
5503 
5504   bool WithNowait = Options.WithNowait;
5505   bool SimpleReduction = Options.SimpleReduction;
5506 
5507   // Next code should be emitted for reduction:
5508   //
5509   // static kmp_critical_name lock = { 0 };
5510   //
5511   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5512   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5513   //  ...
5514   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5515   //  *(Type<n>-1*)rhs[<n>-1]);
5516   // }
5517   //
5518   // ...
5519   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5520   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5521   // RedList, reduce_func, &<lock>)) {
5522   // case 1:
5523   //  ...
5524   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5525   //  ...
5526   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5527   // break;
5528   // case 2:
5529   //  ...
5530   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5531   //  ...
5532   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5533   // break;
5534   // default:;
5535   // }
5536   //
5537   // if SimpleReduction is true, only the next code is generated:
5538   //  ...
5539   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5540   //  ...
5541 
5542   ASTContext &C = CGM.getContext();
5543 
5544   if (SimpleReduction) {
5545     CodeGenFunction::RunCleanupsScope Scope(CGF);
5546     auto IPriv = Privates.begin();
5547     auto ILHS = LHSExprs.begin();
5548     auto IRHS = RHSExprs.begin();
5549     for (const Expr *E : ReductionOps) {
5550       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5551                                   cast<DeclRefExpr>(*IRHS));
5552       ++IPriv;
5553       ++ILHS;
5554       ++IRHS;
5555     }
5556     return;
5557   }
5558 
5559   // 1. Build a list of reduction variables.
5560   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5561   auto Size = RHSExprs.size();
5562   for (const Expr *E : Privates) {
5563     if (E->getType()->isVariablyModifiedType())
5564       // Reserve place for array size.
5565       ++Size;
5566   }
5567   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5568   QualType ReductionArrayTy =
5569       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5570                              /*IndexTypeQuals=*/0);
5571   Address ReductionList =
5572       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5573   auto IPriv = Privates.begin();
5574   unsigned Idx = 0;
5575   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5576     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5577     CGF.Builder.CreateStore(
5578         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5579             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5580         Elem);
5581     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5582       // Store array size.
5583       ++Idx;
5584       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5585       llvm::Value *Size = CGF.Builder.CreateIntCast(
5586           CGF.getVLASize(
5587                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5588               .NumElts,
5589           CGF.SizeTy, /*isSigned=*/false);
5590       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5591                               Elem);
5592     }
5593   }
5594 
5595   // 2. Emit reduce_func().
5596   llvm::Function *ReductionFn = emitReductionFunction(
5597       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5598       LHSExprs, RHSExprs, ReductionOps);
5599 
5600   // 3. Create static kmp_critical_name lock = { 0 };
5601   std::string Name = getName({"reduction"});
5602   llvm::Value *Lock = getCriticalRegionLock(Name);
5603 
5604   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5605   // RedList, reduce_func, &<lock>);
5606   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5607   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5608   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5609   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5610       ReductionList.getPointer(), CGF.VoidPtrTy);
5611   llvm::Value *Args[] = {
5612       IdentTLoc,                             // ident_t *<loc>
5613       ThreadId,                              // i32 <gtid>
5614       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5615       ReductionArrayTySize,                  // size_type sizeof(RedList)
5616       RL,                                    // void *RedList
5617       ReductionFn, // void (*) (void *, void *) <reduce_func>
5618       Lock         // kmp_critical_name *&<lock>
5619   };
5620   llvm::Value *Res = CGF.EmitRuntimeCall(
5621       OMPBuilder.getOrCreateRuntimeFunction(
5622           CGM.getModule(),
5623           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5624       Args);
5625 
5626   // 5. Build switch(res)
5627   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5628   llvm::SwitchInst *SwInst =
5629       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5630 
5631   // 6. Build case 1:
5632   //  ...
5633   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5634   //  ...
5635   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5636   // break;
5637   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5638   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5639   CGF.EmitBlock(Case1BB);
5640 
5641   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5642   llvm::Value *EndArgs[] = {
5643       IdentTLoc, // ident_t *<loc>
5644       ThreadId,  // i32 <gtid>
5645       Lock       // kmp_critical_name *&<lock>
5646   };
5647   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5648                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5649     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5650     auto IPriv = Privates.begin();
5651     auto ILHS = LHSExprs.begin();
5652     auto IRHS = RHSExprs.begin();
5653     for (const Expr *E : ReductionOps) {
5654       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5655                                      cast<DeclRefExpr>(*IRHS));
5656       ++IPriv;
5657       ++ILHS;
5658       ++IRHS;
5659     }
5660   };
5661   RegionCodeGenTy RCG(CodeGen);
5662   CommonActionTy Action(
5663       nullptr, llvm::None,
5664       OMPBuilder.getOrCreateRuntimeFunction(
5665           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5666                                       : OMPRTL___kmpc_end_reduce),
5667       EndArgs);
5668   RCG.setAction(Action);
5669   RCG(CGF);
5670 
5671   CGF.EmitBranch(DefaultBB);
5672 
5673   // 7. Build case 2:
5674   //  ...
5675   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5676   //  ...
5677   // break;
5678   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5679   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5680   CGF.EmitBlock(Case2BB);
5681 
5682   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5683                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5684     auto ILHS = LHSExprs.begin();
5685     auto IRHS = RHSExprs.begin();
5686     auto IPriv = Privates.begin();
5687     for (const Expr *E : ReductionOps) {
5688       const Expr *XExpr = nullptr;
5689       const Expr *EExpr = nullptr;
5690       const Expr *UpExpr = nullptr;
5691       BinaryOperatorKind BO = BO_Comma;
5692       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5693         if (BO->getOpcode() == BO_Assign) {
5694           XExpr = BO->getLHS();
5695           UpExpr = BO->getRHS();
5696         }
5697       }
5698       // Try to emit update expression as a simple atomic.
5699       const Expr *RHSExpr = UpExpr;
5700       if (RHSExpr) {
5701         // Analyze RHS part of the whole expression.
5702         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5703                 RHSExpr->IgnoreParenImpCasts())) {
5704           // If this is a conditional operator, analyze its condition for
5705           // min/max reduction operator.
5706           RHSExpr = ACO->getCond();
5707         }
5708         if (const auto *BORHS =
5709                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5710           EExpr = BORHS->getRHS();
5711           BO = BORHS->getOpcode();
5712         }
5713       }
5714       if (XExpr) {
5715         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5716         auto &&AtomicRedGen = [BO, VD,
5717                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5718                                     const Expr *EExpr, const Expr *UpExpr) {
5719           LValue X = CGF.EmitLValue(XExpr);
5720           RValue E;
5721           if (EExpr)
5722             E = CGF.EmitAnyExpr(EExpr);
5723           CGF.EmitOMPAtomicSimpleUpdateExpr(
5724               X, E, BO, /*IsXLHSInRHSPart=*/true,
5725               llvm::AtomicOrdering::Monotonic, Loc,
5726               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5727                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5728                 PrivateScope.addPrivate(
5729                     VD, [&CGF, VD, XRValue, Loc]() {
5730                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5731                       CGF.emitOMPSimpleStore(
5732                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5733                           VD->getType().getNonReferenceType(), Loc);
5734                       return LHSTemp;
5735                     });
5736                 (void)PrivateScope.Privatize();
5737                 return CGF.EmitAnyExpr(UpExpr);
5738               });
5739         };
5740         if ((*IPriv)->getType()->isArrayType()) {
5741           // Emit atomic reduction for array section.
5742           const auto *RHSVar =
5743               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5744           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5745                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5746         } else {
5747           // Emit atomic reduction for array subscript or single variable.
5748           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5749         }
5750       } else {
5751         // Emit as a critical region.
5752         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5753                                            const Expr *, const Expr *) {
5754           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5755           std::string Name = RT.getName({"atomic_reduction"});
5756           RT.emitCriticalRegion(
5757               CGF, Name,
5758               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5759                 Action.Enter(CGF);
5760                 emitReductionCombiner(CGF, E);
5761               },
5762               Loc);
5763         };
5764         if ((*IPriv)->getType()->isArrayType()) {
5765           const auto *LHSVar =
5766               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5767           const auto *RHSVar =
5768               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5769           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5770                                     CritRedGen);
5771         } else {
5772           CritRedGen(CGF, nullptr, nullptr, nullptr);
5773         }
5774       }
5775       ++ILHS;
5776       ++IRHS;
5777       ++IPriv;
5778     }
5779   };
5780   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5781   if (!WithNowait) {
5782     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5783     llvm::Value *EndArgs[] = {
5784         IdentTLoc, // ident_t *<loc>
5785         ThreadId,  // i32 <gtid>
5786         Lock       // kmp_critical_name *&<lock>
5787     };
5788     CommonActionTy Action(nullptr, llvm::None,
5789                           OMPBuilder.getOrCreateRuntimeFunction(
5790                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5791                           EndArgs);
5792     AtomicRCG.setAction(Action);
5793     AtomicRCG(CGF);
5794   } else {
5795     AtomicRCG(CGF);
5796   }
5797 
5798   CGF.EmitBranch(DefaultBB);
5799   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5800 }
5801 
5802 /// Generates unique name for artificial threadprivate variables.
5803 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5804 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5805                                       const Expr *Ref) {
5806   SmallString<256> Buffer;
5807   llvm::raw_svector_ostream Out(Buffer);
5808   const clang::DeclRefExpr *DE;
5809   const VarDecl *D = ::getBaseDecl(Ref, DE);
5810   if (!D)
5811     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5812   D = D->getCanonicalDecl();
5813   std::string Name = CGM.getOpenMPRuntime().getName(
5814       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5815   Out << Prefix << Name << "_"
5816       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5817   return std::string(Out.str());
5818 }
5819 
5820 /// Emits reduction initializer function:
5821 /// \code
5822 /// void @.red_init(void* %arg, void* %orig) {
5823 /// %0 = bitcast void* %arg to <type>*
5824 /// store <type> <init>, <type>* %0
5825 /// ret void
5826 /// }
5827 /// \endcode
5828 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5829                                            SourceLocation Loc,
5830                                            ReductionCodeGen &RCG, unsigned N) {
5831   ASTContext &C = CGM.getContext();
5832   QualType VoidPtrTy = C.VoidPtrTy;
5833   VoidPtrTy.addRestrict();
5834   FunctionArgList Args;
5835   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5836                           ImplicitParamDecl::Other);
5837   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5838                               ImplicitParamDecl::Other);
5839   Args.emplace_back(&Param);
5840   Args.emplace_back(&ParamOrig);
5841   const auto &FnInfo =
5842       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5843   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5844   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5845   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5846                                     Name, &CGM.getModule());
5847   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5848   Fn->setDoesNotRecurse();
5849   CodeGenFunction CGF(CGM);
5850   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5851   Address PrivateAddr = CGF.EmitLoadOfPointer(
5852       CGF.GetAddrOfLocalVar(&Param),
5853       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5854   llvm::Value *Size = nullptr;
5855   // If the size of the reduction item is non-constant, load it from global
5856   // threadprivate variable.
5857   if (RCG.getSizes(N).second) {
5858     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5859         CGF, CGM.getContext().getSizeType(),
5860         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5861     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5862                                 CGM.getContext().getSizeType(), Loc);
5863   }
5864   RCG.emitAggregateType(CGF, N, Size);
5865   LValue OrigLVal;
5866   // If initializer uses initializer from declare reduction construct, emit a
5867   // pointer to the address of the original reduction item (reuired by reduction
5868   // initializer)
5869   if (RCG.usesReductionInitializer(N)) {
5870     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5871     SharedAddr = CGF.EmitLoadOfPointer(
5872         SharedAddr,
5873         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5874     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5875   } else {
5876     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5877         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5878         CGM.getContext().VoidPtrTy);
5879   }
5880   // Emit the initializer:
5881   // %0 = bitcast void* %arg to <type>*
5882   // store <type> <init>, <type>* %0
5883   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5884                          [](CodeGenFunction &) { return false; });
5885   CGF.FinishFunction();
5886   return Fn;
5887 }
5888 
5889 /// Emits reduction combiner function:
5890 /// \code
5891 /// void @.red_comb(void* %arg0, void* %arg1) {
5892 /// %lhs = bitcast void* %arg0 to <type>*
5893 /// %rhs = bitcast void* %arg1 to <type>*
5894 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5895 /// store <type> %2, <type>* %lhs
5896 /// ret void
5897 /// }
5898 /// \endcode
5899 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5900                                            SourceLocation Loc,
5901                                            ReductionCodeGen &RCG, unsigned N,
5902                                            const Expr *ReductionOp,
5903                                            const Expr *LHS, const Expr *RHS,
5904                                            const Expr *PrivateRef) {
5905   ASTContext &C = CGM.getContext();
5906   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5907   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5908   FunctionArgList Args;
5909   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5910                                C.VoidPtrTy, ImplicitParamDecl::Other);
5911   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5912                             ImplicitParamDecl::Other);
5913   Args.emplace_back(&ParamInOut);
5914   Args.emplace_back(&ParamIn);
5915   const auto &FnInfo =
5916       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5917   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5918   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5919   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5920                                     Name, &CGM.getModule());
5921   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5922   Fn->setDoesNotRecurse();
5923   CodeGenFunction CGF(CGM);
5924   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5925   llvm::Value *Size = nullptr;
5926   // If the size of the reduction item is non-constant, load it from global
5927   // threadprivate variable.
5928   if (RCG.getSizes(N).second) {
5929     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5930         CGF, CGM.getContext().getSizeType(),
5931         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5932     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5933                                 CGM.getContext().getSizeType(), Loc);
5934   }
5935   RCG.emitAggregateType(CGF, N, Size);
5936   // Remap lhs and rhs variables to the addresses of the function arguments.
5937   // %lhs = bitcast void* %arg0 to <type>*
5938   // %rhs = bitcast void* %arg1 to <type>*
5939   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5940   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5941     // Pull out the pointer to the variable.
5942     Address PtrAddr = CGF.EmitLoadOfPointer(
5943         CGF.GetAddrOfLocalVar(&ParamInOut),
5944         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5945     return CGF.Builder.CreateElementBitCast(
5946         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5947   });
5948   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5949     // Pull out the pointer to the variable.
5950     Address PtrAddr = CGF.EmitLoadOfPointer(
5951         CGF.GetAddrOfLocalVar(&ParamIn),
5952         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5953     return CGF.Builder.CreateElementBitCast(
5954         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5955   });
5956   PrivateScope.Privatize();
5957   // Emit the combiner body:
5958   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5959   // store <type> %2, <type>* %lhs
5960   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5961       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5962       cast<DeclRefExpr>(RHS));
5963   CGF.FinishFunction();
5964   return Fn;
5965 }
5966 
5967 /// Emits reduction finalizer function:
5968 /// \code
5969 /// void @.red_fini(void* %arg) {
5970 /// %0 = bitcast void* %arg to <type>*
5971 /// <destroy>(<type>* %0)
5972 /// ret void
5973 /// }
5974 /// \endcode
5975 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5976                                            SourceLocation Loc,
5977                                            ReductionCodeGen &RCG, unsigned N) {
5978   if (!RCG.needCleanups(N))
5979     return nullptr;
5980   ASTContext &C = CGM.getContext();
5981   FunctionArgList Args;
5982   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5983                           ImplicitParamDecl::Other);
5984   Args.emplace_back(&Param);
5985   const auto &FnInfo =
5986       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5987   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5988   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5989   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5990                                     Name, &CGM.getModule());
5991   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5992   Fn->setDoesNotRecurse();
5993   CodeGenFunction CGF(CGM);
5994   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5995   Address PrivateAddr = CGF.EmitLoadOfPointer(
5996       CGF.GetAddrOfLocalVar(&Param),
5997       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5998   llvm::Value *Size = nullptr;
5999   // If the size of the reduction item is non-constant, load it from global
6000   // threadprivate variable.
6001   if (RCG.getSizes(N).second) {
6002     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6003         CGF, CGM.getContext().getSizeType(),
6004         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6005     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6006                                 CGM.getContext().getSizeType(), Loc);
6007   }
6008   RCG.emitAggregateType(CGF, N, Size);
6009   // Emit the finalizer body:
6010   // <destroy>(<type>* %0)
6011   RCG.emitCleanups(CGF, N, PrivateAddr);
6012   CGF.FinishFunction(Loc);
6013   return Fn;
6014 }
6015 
6016 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6017     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6018     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6019   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6020     return nullptr;
6021 
6022   // Build typedef struct:
6023   // kmp_taskred_input {
6024   //   void *reduce_shar; // shared reduction item
6025   //   void *reduce_orig; // original reduction item used for initialization
6026   //   size_t reduce_size; // size of data item
6027   //   void *reduce_init; // data initialization routine
6028   //   void *reduce_fini; // data finalization routine
6029   //   void *reduce_comb; // data combiner routine
6030   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6031   // } kmp_taskred_input_t;
6032   ASTContext &C = CGM.getContext();
6033   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6034   RD->startDefinition();
6035   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6036   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6037   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6038   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6039   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6040   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6041   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6042       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6043   RD->completeDefinition();
6044   QualType RDType = C.getRecordType(RD);
6045   unsigned Size = Data.ReductionVars.size();
6046   llvm::APInt ArraySize(/*numBits=*/64, Size);
6047   QualType ArrayRDType = C.getConstantArrayType(
6048       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6049   // kmp_task_red_input_t .rd_input.[Size];
6050   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6051   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6052                        Data.ReductionCopies, Data.ReductionOps);
6053   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6054     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6055     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6056                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6057     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6058         TaskRedInput.getPointer(), Idxs,
6059         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6060         ".rd_input.gep.");
6061     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6062     // ElemLVal.reduce_shar = &Shareds[Cnt];
6063     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6064     RCG.emitSharedOrigLValue(CGF, Cnt);
6065     llvm::Value *CastedShared =
6066         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6067     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6068     // ElemLVal.reduce_orig = &Origs[Cnt];
6069     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6070     llvm::Value *CastedOrig =
6071         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6072     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6073     RCG.emitAggregateType(CGF, Cnt);
6074     llvm::Value *SizeValInChars;
6075     llvm::Value *SizeVal;
6076     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6077     // We use delayed creation/initialization for VLAs and array sections. It is
6078     // required because runtime does not provide the way to pass the sizes of
6079     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6080     // threadprivate global variables are used to store these values and use
6081     // them in the functions.
6082     bool DelayedCreation = !!SizeVal;
6083     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6084                                                /*isSigned=*/false);
6085     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6086     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6087     // ElemLVal.reduce_init = init;
6088     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6089     llvm::Value *InitAddr =
6090         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6091     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6092     // ElemLVal.reduce_fini = fini;
6093     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6094     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6095     llvm::Value *FiniAddr = Fini
6096                                 ? CGF.EmitCastToVoidPtr(Fini)
6097                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6098     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6099     // ElemLVal.reduce_comb = comb;
6100     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6101     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6102         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6103         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6104     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6105     // ElemLVal.flags = 0;
6106     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6107     if (DelayedCreation) {
6108       CGF.EmitStoreOfScalar(
6109           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6110           FlagsLVal);
6111     } else
6112       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6113                                  FlagsLVal.getType());
6114   }
6115   if (Data.IsReductionWithTaskMod) {
6116     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6117     // is_ws, int num, void *data);
6118     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6119     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6120                                                   CGM.IntTy, /*isSigned=*/true);
6121     llvm::Value *Args[] = {
6122         IdentTLoc, GTid,
6123         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6124                                /*isSigned=*/true),
6125         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6126         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6127             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6128     return CGF.EmitRuntimeCall(
6129         OMPBuilder.getOrCreateRuntimeFunction(
6130             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6131         Args);
6132   }
6133   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6134   llvm::Value *Args[] = {
6135       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6136                                 /*isSigned=*/true),
6137       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6138       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6139                                                       CGM.VoidPtrTy)};
6140   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6141                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6142                              Args);
6143 }
6144 
6145 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6146                                             SourceLocation Loc,
6147                                             bool IsWorksharingReduction) {
6148   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6149   // is_ws, int num, void *data);
6150   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6151   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6152                                                 CGM.IntTy, /*isSigned=*/true);
6153   llvm::Value *Args[] = {IdentTLoc, GTid,
6154                          llvm::ConstantInt::get(CGM.IntTy,
6155                                                 IsWorksharingReduction ? 1 : 0,
6156                                                 /*isSigned=*/true)};
6157   (void)CGF.EmitRuntimeCall(
6158       OMPBuilder.getOrCreateRuntimeFunction(
6159           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6160       Args);
6161 }
6162 
6163 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6164                                               SourceLocation Loc,
6165                                               ReductionCodeGen &RCG,
6166                                               unsigned N) {
6167   auto Sizes = RCG.getSizes(N);
6168   // Emit threadprivate global variable if the type is non-constant
6169   // (Sizes.second = nullptr).
6170   if (Sizes.second) {
6171     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6172                                                      /*isSigned=*/false);
6173     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6174         CGF, CGM.getContext().getSizeType(),
6175         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6176     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6177   }
6178 }
6179 
6180 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6181                                               SourceLocation Loc,
6182                                               llvm::Value *ReductionsPtr,
6183                                               LValue SharedLVal) {
6184   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6185   // *d);
6186   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6187                                                    CGM.IntTy,
6188                                                    /*isSigned=*/true),
6189                          ReductionsPtr,
6190                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6191                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6192   return Address(
6193       CGF.EmitRuntimeCall(
6194           OMPBuilder.getOrCreateRuntimeFunction(
6195               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6196           Args),
6197       SharedLVal.getAlignment());
6198 }
6199 
6200 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6201                                        SourceLocation Loc) {
6202   if (!CGF.HaveInsertPoint())
6203     return;
6204 
6205   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6206     OMPBuilder.createTaskwait(CGF.Builder);
6207   } else {
6208     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6209     // global_tid);
6210     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6211     // Ignore return result until untied tasks are supported.
6212     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6213                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6214                         Args);
6215   }
6216 
6217   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6218     Region->emitUntiedSwitch(CGF);
6219 }
6220 
6221 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6222                                            OpenMPDirectiveKind InnerKind,
6223                                            const RegionCodeGenTy &CodeGen,
6224                                            bool HasCancel) {
6225   if (!CGF.HaveInsertPoint())
6226     return;
6227   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6228                                  InnerKind != OMPD_critical &&
6229                                      InnerKind != OMPD_master);
6230   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6231 }
6232 
6233 namespace {
6234 enum RTCancelKind {
6235   CancelNoreq = 0,
6236   CancelParallel = 1,
6237   CancelLoop = 2,
6238   CancelSections = 3,
6239   CancelTaskgroup = 4
6240 };
6241 } // anonymous namespace
6242 
6243 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6244   RTCancelKind CancelKind = CancelNoreq;
6245   if (CancelRegion == OMPD_parallel)
6246     CancelKind = CancelParallel;
6247   else if (CancelRegion == OMPD_for)
6248     CancelKind = CancelLoop;
6249   else if (CancelRegion == OMPD_sections)
6250     CancelKind = CancelSections;
6251   else {
6252     assert(CancelRegion == OMPD_taskgroup);
6253     CancelKind = CancelTaskgroup;
6254   }
6255   return CancelKind;
6256 }
6257 
6258 void CGOpenMPRuntime::emitCancellationPointCall(
6259     CodeGenFunction &CGF, SourceLocation Loc,
6260     OpenMPDirectiveKind CancelRegion) {
6261   if (!CGF.HaveInsertPoint())
6262     return;
6263   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6264   // global_tid, kmp_int32 cncl_kind);
6265   if (auto *OMPRegionInfo =
6266           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6267     // For 'cancellation point taskgroup', the task region info may not have a
6268     // cancel. This may instead happen in another adjacent task.
6269     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6270       llvm::Value *Args[] = {
6271           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6272           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6273       // Ignore return result until untied tasks are supported.
6274       llvm::Value *Result = CGF.EmitRuntimeCall(
6275           OMPBuilder.getOrCreateRuntimeFunction(
6276               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6277           Args);
6278       // if (__kmpc_cancellationpoint()) {
6279       //   exit from construct;
6280       // }
6281       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6282       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6283       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6284       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6285       CGF.EmitBlock(ExitBB);
6286       // exit from construct;
6287       CodeGenFunction::JumpDest CancelDest =
6288           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6289       CGF.EmitBranchThroughCleanup(CancelDest);
6290       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6291     }
6292   }
6293 }
6294 
6295 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6296                                      const Expr *IfCond,
6297                                      OpenMPDirectiveKind CancelRegion) {
6298   if (!CGF.HaveInsertPoint())
6299     return;
6300   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6301   // kmp_int32 cncl_kind);
6302   auto &M = CGM.getModule();
6303   if (auto *OMPRegionInfo =
6304           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6305     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6306                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6307       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6308       llvm::Value *Args[] = {
6309           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6310           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6311       // Ignore return result until untied tasks are supported.
6312       llvm::Value *Result = CGF.EmitRuntimeCall(
6313           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6314       // if (__kmpc_cancel()) {
6315       //   exit from construct;
6316       // }
6317       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6318       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6319       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6320       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6321       CGF.EmitBlock(ExitBB);
6322       // exit from construct;
6323       CodeGenFunction::JumpDest CancelDest =
6324           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6325       CGF.EmitBranchThroughCleanup(CancelDest);
6326       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6327     };
6328     if (IfCond) {
6329       emitIfClause(CGF, IfCond, ThenGen,
6330                    [](CodeGenFunction &, PrePostActionTy &) {});
6331     } else {
6332       RegionCodeGenTy ThenRCG(ThenGen);
6333       ThenRCG(CGF);
6334     }
6335   }
6336 }
6337 
6338 namespace {
6339 /// Cleanup action for uses_allocators support.
6340 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6341   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6342 
6343 public:
6344   OMPUsesAllocatorsActionTy(
6345       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6346       : Allocators(Allocators) {}
6347   void Enter(CodeGenFunction &CGF) override {
6348     if (!CGF.HaveInsertPoint())
6349       return;
6350     for (const auto &AllocatorData : Allocators) {
6351       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6352           CGF, AllocatorData.first, AllocatorData.second);
6353     }
6354   }
6355   void Exit(CodeGenFunction &CGF) override {
6356     if (!CGF.HaveInsertPoint())
6357       return;
6358     for (const auto &AllocatorData : Allocators) {
6359       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6360                                                         AllocatorData.first);
6361     }
6362   }
6363 };
6364 } // namespace
6365 
6366 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6367     const OMPExecutableDirective &D, StringRef ParentName,
6368     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6369     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6370   assert(!ParentName.empty() && "Invalid target region parent name!");
6371   HasEmittedTargetRegion = true;
6372   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6373   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6374     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6375       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6376       if (!D.AllocatorTraits)
6377         continue;
6378       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6379     }
6380   }
6381   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6382   CodeGen.setAction(UsesAllocatorAction);
6383   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6384                                    IsOffloadEntry, CodeGen);
6385 }
6386 
6387 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6388                                              const Expr *Allocator,
6389                                              const Expr *AllocatorTraits) {
6390   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6391   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6392   // Use default memspace handle.
6393   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6394   llvm::Value *NumTraits = llvm::ConstantInt::get(
6395       CGF.IntTy, cast<ConstantArrayType>(
6396                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6397                      ->getSize()
6398                      .getLimitedValue());
6399   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6400   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6401       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6402   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6403                                            AllocatorTraitsLVal.getBaseInfo(),
6404                                            AllocatorTraitsLVal.getTBAAInfo());
6405   llvm::Value *Traits =
6406       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6407 
6408   llvm::Value *AllocatorVal =
6409       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6410                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6411                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6412   // Store to allocator.
6413   CGF.EmitVarDecl(*cast<VarDecl>(
6414       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6415   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6416   AllocatorVal =
6417       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6418                                Allocator->getType(), Allocator->getExprLoc());
6419   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6420 }
6421 
6422 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6423                                              const Expr *Allocator) {
6424   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6425   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6426   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6427   llvm::Value *AllocatorVal =
6428       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6429   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6430                                           CGF.getContext().VoidPtrTy,
6431                                           Allocator->getExprLoc());
6432   (void)CGF.EmitRuntimeCall(
6433       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6434                                             OMPRTL___kmpc_destroy_allocator),
6435       {ThreadId, AllocatorVal});
6436 }
6437 
6438 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6439     const OMPExecutableDirective &D, StringRef ParentName,
6440     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6441     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6442   // Create a unique name for the entry function using the source location
6443   // information of the current target region. The name will be something like:
6444   //
6445   // __omp_offloading_DD_FFFF_PP_lBB
6446   //
6447   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6448   // mangled name of the function that encloses the target region and BB is the
6449   // line number of the target region.
6450 
6451   unsigned DeviceID;
6452   unsigned FileID;
6453   unsigned Line;
6454   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6455                            Line);
6456   SmallString<64> EntryFnName;
6457   {
6458     llvm::raw_svector_ostream OS(EntryFnName);
6459     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6460        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6461   }
6462 
6463   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6464 
6465   CodeGenFunction CGF(CGM, true);
6466   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6467   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6468 
6469   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6470 
6471   // If this target outline function is not an offload entry, we don't need to
6472   // register it.
6473   if (!IsOffloadEntry)
6474     return;
6475 
6476   // The target region ID is used by the runtime library to identify the current
6477   // target region, so it only has to be unique and not necessarily point to
6478   // anything. It could be the pointer to the outlined function that implements
6479   // the target region, but we aren't using that so that the compiler doesn't
6480   // need to keep that, and could therefore inline the host function if proven
6481   // worthwhile during optimization. In the other hand, if emitting code for the
6482   // device, the ID has to be the function address so that it can retrieved from
6483   // the offloading entry and launched by the runtime library. We also mark the
6484   // outlined function to have external linkage in case we are emitting code for
6485   // the device, because these functions will be entry points to the device.
6486 
6487   if (CGM.getLangOpts().OpenMPIsDevice) {
6488     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6489     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6490     OutlinedFn->setDSOLocal(false);
6491     if (CGM.getTriple().isAMDGCN())
6492       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6493   } else {
6494     std::string Name = getName({EntryFnName, "region_id"});
6495     OutlinedFnID = new llvm::GlobalVariable(
6496         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6497         llvm::GlobalValue::WeakAnyLinkage,
6498         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6499   }
6500 
6501   // Register the information for the entry associated with this target region.
6502   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6503       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6504       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6505 }
6506 
6507 /// Checks if the expression is constant or does not have non-trivial function
6508 /// calls.
6509 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6510   // We can skip constant expressions.
6511   // We can skip expressions with trivial calls or simple expressions.
6512   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6513           !E->hasNonTrivialCall(Ctx)) &&
6514          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6515 }
6516 
6517 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6518                                                     const Stmt *Body) {
6519   const Stmt *Child = Body->IgnoreContainers();
6520   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6521     Child = nullptr;
6522     for (const Stmt *S : C->body()) {
6523       if (const auto *E = dyn_cast<Expr>(S)) {
6524         if (isTrivial(Ctx, E))
6525           continue;
6526       }
6527       // Some of the statements can be ignored.
6528       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6529           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6530         continue;
6531       // Analyze declarations.
6532       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6533         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6534               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6535                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6536                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6537                   isa<UsingDirectiveDecl>(D) ||
6538                   isa<OMPDeclareReductionDecl>(D) ||
6539                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6540                 return true;
6541               const auto *VD = dyn_cast<VarDecl>(D);
6542               if (!VD)
6543                 return false;
6544               return VD->isConstexpr() ||
6545                      ((VD->getType().isTrivialType(Ctx) ||
6546                        VD->getType()->isReferenceType()) &&
6547                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6548             }))
6549           continue;
6550       }
6551       // Found multiple children - cannot get the one child only.
6552       if (Child)
6553         return nullptr;
6554       Child = S;
6555     }
6556     if (Child)
6557       Child = Child->IgnoreContainers();
6558   }
6559   return Child;
6560 }
6561 
6562 /// Emit the number of teams for a target directive.  Inspect the num_teams
6563 /// clause associated with a teams construct combined or closely nested
6564 /// with the target directive.
6565 ///
6566 /// Emit a team of size one for directives such as 'target parallel' that
6567 /// have no associated teams construct.
6568 ///
6569 /// Otherwise, return nullptr.
6570 static llvm::Value *
6571 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6572                                const OMPExecutableDirective &D) {
6573   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6574          "Clauses associated with the teams directive expected to be emitted "
6575          "only for the host!");
6576   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6577   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6578          "Expected target-based executable directive.");
6579   CGBuilderTy &Bld = CGF.Builder;
6580   switch (DirectiveKind) {
6581   case OMPD_target: {
6582     const auto *CS = D.getInnermostCapturedStmt();
6583     const auto *Body =
6584         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6585     const Stmt *ChildStmt =
6586         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6587     if (const auto *NestedDir =
6588             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6589       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6590         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6591           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6592           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6593           const Expr *NumTeams =
6594               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6595           llvm::Value *NumTeamsVal =
6596               CGF.EmitScalarExpr(NumTeams,
6597                                  /*IgnoreResultAssign*/ true);
6598           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6599                                    /*isSigned=*/true);
6600         }
6601         return Bld.getInt32(0);
6602       }
6603       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6604           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6605         return Bld.getInt32(1);
6606       return Bld.getInt32(0);
6607     }
6608     return nullptr;
6609   }
6610   case OMPD_target_teams:
6611   case OMPD_target_teams_distribute:
6612   case OMPD_target_teams_distribute_simd:
6613   case OMPD_target_teams_distribute_parallel_for:
6614   case OMPD_target_teams_distribute_parallel_for_simd: {
6615     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6616       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6617       const Expr *NumTeams =
6618           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6619       llvm::Value *NumTeamsVal =
6620           CGF.EmitScalarExpr(NumTeams,
6621                              /*IgnoreResultAssign*/ true);
6622       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6623                                /*isSigned=*/true);
6624     }
6625     return Bld.getInt32(0);
6626   }
6627   case OMPD_target_parallel:
6628   case OMPD_target_parallel_for:
6629   case OMPD_target_parallel_for_simd:
6630   case OMPD_target_simd:
6631     return Bld.getInt32(1);
6632   case OMPD_parallel:
6633   case OMPD_for:
6634   case OMPD_parallel_for:
6635   case OMPD_parallel_master:
6636   case OMPD_parallel_sections:
6637   case OMPD_for_simd:
6638   case OMPD_parallel_for_simd:
6639   case OMPD_cancel:
6640   case OMPD_cancellation_point:
6641   case OMPD_ordered:
6642   case OMPD_threadprivate:
6643   case OMPD_allocate:
6644   case OMPD_task:
6645   case OMPD_simd:
6646   case OMPD_tile:
6647   case OMPD_sections:
6648   case OMPD_section:
6649   case OMPD_single:
6650   case OMPD_master:
6651   case OMPD_critical:
6652   case OMPD_taskyield:
6653   case OMPD_barrier:
6654   case OMPD_taskwait:
6655   case OMPD_taskgroup:
6656   case OMPD_atomic:
6657   case OMPD_flush:
6658   case OMPD_depobj:
6659   case OMPD_scan:
6660   case OMPD_teams:
6661   case OMPD_target_data:
6662   case OMPD_target_exit_data:
6663   case OMPD_target_enter_data:
6664   case OMPD_distribute:
6665   case OMPD_distribute_simd:
6666   case OMPD_distribute_parallel_for:
6667   case OMPD_distribute_parallel_for_simd:
6668   case OMPD_teams_distribute:
6669   case OMPD_teams_distribute_simd:
6670   case OMPD_teams_distribute_parallel_for:
6671   case OMPD_teams_distribute_parallel_for_simd:
6672   case OMPD_target_update:
6673   case OMPD_declare_simd:
6674   case OMPD_declare_variant:
6675   case OMPD_begin_declare_variant:
6676   case OMPD_end_declare_variant:
6677   case OMPD_declare_target:
6678   case OMPD_end_declare_target:
6679   case OMPD_declare_reduction:
6680   case OMPD_declare_mapper:
6681   case OMPD_taskloop:
6682   case OMPD_taskloop_simd:
6683   case OMPD_master_taskloop:
6684   case OMPD_master_taskloop_simd:
6685   case OMPD_parallel_master_taskloop:
6686   case OMPD_parallel_master_taskloop_simd:
6687   case OMPD_requires:
6688   case OMPD_unknown:
6689     break;
6690   default:
6691     break;
6692   }
6693   llvm_unreachable("Unexpected directive kind.");
6694 }
6695 
6696 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6697                                   llvm::Value *DefaultThreadLimitVal) {
6698   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6699       CGF.getContext(), CS->getCapturedStmt());
6700   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6701     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6702       llvm::Value *NumThreads = nullptr;
6703       llvm::Value *CondVal = nullptr;
6704       // Handle if clause. If if clause present, the number of threads is
6705       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6706       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6707         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6708         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6709         const OMPIfClause *IfClause = nullptr;
6710         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6711           if (C->getNameModifier() == OMPD_unknown ||
6712               C->getNameModifier() == OMPD_parallel) {
6713             IfClause = C;
6714             break;
6715           }
6716         }
6717         if (IfClause) {
6718           const Expr *Cond = IfClause->getCondition();
6719           bool Result;
6720           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6721             if (!Result)
6722               return CGF.Builder.getInt32(1);
6723           } else {
6724             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6725             if (const auto *PreInit =
6726                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6727               for (const auto *I : PreInit->decls()) {
6728                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6729                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6730                 } else {
6731                   CodeGenFunction::AutoVarEmission Emission =
6732                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6733                   CGF.EmitAutoVarCleanups(Emission);
6734                 }
6735               }
6736             }
6737             CondVal = CGF.EvaluateExprAsBool(Cond);
6738           }
6739         }
6740       }
6741       // Check the value of num_threads clause iff if clause was not specified
6742       // or is not evaluated to false.
6743       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6744         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6745         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6746         const auto *NumThreadsClause =
6747             Dir->getSingleClause<OMPNumThreadsClause>();
6748         CodeGenFunction::LexicalScope Scope(
6749             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6750         if (const auto *PreInit =
6751                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6752           for (const auto *I : PreInit->decls()) {
6753             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6754               CGF.EmitVarDecl(cast<VarDecl>(*I));
6755             } else {
6756               CodeGenFunction::AutoVarEmission Emission =
6757                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6758               CGF.EmitAutoVarCleanups(Emission);
6759             }
6760           }
6761         }
6762         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6763         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6764                                                /*isSigned=*/false);
6765         if (DefaultThreadLimitVal)
6766           NumThreads = CGF.Builder.CreateSelect(
6767               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6768               DefaultThreadLimitVal, NumThreads);
6769       } else {
6770         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6771                                            : CGF.Builder.getInt32(0);
6772       }
6773       // Process condition of the if clause.
6774       if (CondVal) {
6775         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6776                                               CGF.Builder.getInt32(1));
6777       }
6778       return NumThreads;
6779     }
6780     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6781       return CGF.Builder.getInt32(1);
6782     return DefaultThreadLimitVal;
6783   }
6784   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6785                                : CGF.Builder.getInt32(0);
6786 }
6787 
6788 /// Emit the number of threads for a target directive.  Inspect the
6789 /// thread_limit clause associated with a teams construct combined or closely
6790 /// nested with the target directive.
6791 ///
6792 /// Emit the num_threads clause for directives such as 'target parallel' that
6793 /// have no associated teams construct.
6794 ///
6795 /// Otherwise, return nullptr.
6796 static llvm::Value *
6797 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6798                                  const OMPExecutableDirective &D) {
6799   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6800          "Clauses associated with the teams directive expected to be emitted "
6801          "only for the host!");
6802   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6803   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6804          "Expected target-based executable directive.");
6805   CGBuilderTy &Bld = CGF.Builder;
6806   llvm::Value *ThreadLimitVal = nullptr;
6807   llvm::Value *NumThreadsVal = nullptr;
6808   switch (DirectiveKind) {
6809   case OMPD_target: {
6810     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6811     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6812       return NumThreads;
6813     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6814         CGF.getContext(), CS->getCapturedStmt());
6815     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6816       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6817         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6818         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6819         const auto *ThreadLimitClause =
6820             Dir->getSingleClause<OMPThreadLimitClause>();
6821         CodeGenFunction::LexicalScope Scope(
6822             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6823         if (const auto *PreInit =
6824                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6825           for (const auto *I : PreInit->decls()) {
6826             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6827               CGF.EmitVarDecl(cast<VarDecl>(*I));
6828             } else {
6829               CodeGenFunction::AutoVarEmission Emission =
6830                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6831               CGF.EmitAutoVarCleanups(Emission);
6832             }
6833           }
6834         }
6835         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6836             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6837         ThreadLimitVal =
6838             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6839       }
6840       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6841           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6842         CS = Dir->getInnermostCapturedStmt();
6843         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6844             CGF.getContext(), CS->getCapturedStmt());
6845         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6846       }
6847       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6848           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6849         CS = Dir->getInnermostCapturedStmt();
6850         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6851           return NumThreads;
6852       }
6853       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6854         return Bld.getInt32(1);
6855     }
6856     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6857   }
6858   case OMPD_target_teams: {
6859     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6860       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6861       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6862       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6863           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6864       ThreadLimitVal =
6865           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6866     }
6867     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6868     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6869       return NumThreads;
6870     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6871         CGF.getContext(), CS->getCapturedStmt());
6872     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6873       if (Dir->getDirectiveKind() == OMPD_distribute) {
6874         CS = Dir->getInnermostCapturedStmt();
6875         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6876           return NumThreads;
6877       }
6878     }
6879     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6880   }
6881   case OMPD_target_teams_distribute:
6882     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6883       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6884       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6885       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6886           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6887       ThreadLimitVal =
6888           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6889     }
6890     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6891   case OMPD_target_parallel:
6892   case OMPD_target_parallel_for:
6893   case OMPD_target_parallel_for_simd:
6894   case OMPD_target_teams_distribute_parallel_for:
6895   case OMPD_target_teams_distribute_parallel_for_simd: {
6896     llvm::Value *CondVal = nullptr;
6897     // Handle if clause. If if clause present, the number of threads is
6898     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6899     if (D.hasClausesOfKind<OMPIfClause>()) {
6900       const OMPIfClause *IfClause = nullptr;
6901       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6902         if (C->getNameModifier() == OMPD_unknown ||
6903             C->getNameModifier() == OMPD_parallel) {
6904           IfClause = C;
6905           break;
6906         }
6907       }
6908       if (IfClause) {
6909         const Expr *Cond = IfClause->getCondition();
6910         bool Result;
6911         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6912           if (!Result)
6913             return Bld.getInt32(1);
6914         } else {
6915           CodeGenFunction::RunCleanupsScope Scope(CGF);
6916           CondVal = CGF.EvaluateExprAsBool(Cond);
6917         }
6918       }
6919     }
6920     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6921       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6922       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6923       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6924           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6925       ThreadLimitVal =
6926           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6927     }
6928     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6929       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6930       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6931       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6932           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6933       NumThreadsVal =
6934           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6935       ThreadLimitVal = ThreadLimitVal
6936                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6937                                                                 ThreadLimitVal),
6938                                               NumThreadsVal, ThreadLimitVal)
6939                            : NumThreadsVal;
6940     }
6941     if (!ThreadLimitVal)
6942       ThreadLimitVal = Bld.getInt32(0);
6943     if (CondVal)
6944       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6945     return ThreadLimitVal;
6946   }
6947   case OMPD_target_teams_distribute_simd:
6948   case OMPD_target_simd:
6949     return Bld.getInt32(1);
6950   case OMPD_parallel:
6951   case OMPD_for:
6952   case OMPD_parallel_for:
6953   case OMPD_parallel_master:
6954   case OMPD_parallel_sections:
6955   case OMPD_for_simd:
6956   case OMPD_parallel_for_simd:
6957   case OMPD_cancel:
6958   case OMPD_cancellation_point:
6959   case OMPD_ordered:
6960   case OMPD_threadprivate:
6961   case OMPD_allocate:
6962   case OMPD_task:
6963   case OMPD_simd:
6964   case OMPD_tile:
6965   case OMPD_sections:
6966   case OMPD_section:
6967   case OMPD_single:
6968   case OMPD_master:
6969   case OMPD_critical:
6970   case OMPD_taskyield:
6971   case OMPD_barrier:
6972   case OMPD_taskwait:
6973   case OMPD_taskgroup:
6974   case OMPD_atomic:
6975   case OMPD_flush:
6976   case OMPD_depobj:
6977   case OMPD_scan:
6978   case OMPD_teams:
6979   case OMPD_target_data:
6980   case OMPD_target_exit_data:
6981   case OMPD_target_enter_data:
6982   case OMPD_distribute:
6983   case OMPD_distribute_simd:
6984   case OMPD_distribute_parallel_for:
6985   case OMPD_distribute_parallel_for_simd:
6986   case OMPD_teams_distribute:
6987   case OMPD_teams_distribute_simd:
6988   case OMPD_teams_distribute_parallel_for:
6989   case OMPD_teams_distribute_parallel_for_simd:
6990   case OMPD_target_update:
6991   case OMPD_declare_simd:
6992   case OMPD_declare_variant:
6993   case OMPD_begin_declare_variant:
6994   case OMPD_end_declare_variant:
6995   case OMPD_declare_target:
6996   case OMPD_end_declare_target:
6997   case OMPD_declare_reduction:
6998   case OMPD_declare_mapper:
6999   case OMPD_taskloop:
7000   case OMPD_taskloop_simd:
7001   case OMPD_master_taskloop:
7002   case OMPD_master_taskloop_simd:
7003   case OMPD_parallel_master_taskloop:
7004   case OMPD_parallel_master_taskloop_simd:
7005   case OMPD_requires:
7006   case OMPD_unknown:
7007     break;
7008   default:
7009     break;
7010   }
7011   llvm_unreachable("Unsupported directive kind.");
7012 }
7013 
7014 namespace {
7015 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7016 
7017 // Utility to handle information from clauses associated with a given
7018 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7019 // It provides a convenient interface to obtain the information and generate
7020 // code for that information.
7021 class MappableExprsHandler {
7022 public:
7023   /// Values for bit flags used to specify the mapping type for
7024   /// offloading.
7025   enum OpenMPOffloadMappingFlags : uint64_t {
7026     /// No flags
7027     OMP_MAP_NONE = 0x0,
7028     /// Allocate memory on the device and move data from host to device.
7029     OMP_MAP_TO = 0x01,
7030     /// Allocate memory on the device and move data from device to host.
7031     OMP_MAP_FROM = 0x02,
7032     /// Always perform the requested mapping action on the element, even
7033     /// if it was already mapped before.
7034     OMP_MAP_ALWAYS = 0x04,
7035     /// Delete the element from the device environment, ignoring the
7036     /// current reference count associated with the element.
7037     OMP_MAP_DELETE = 0x08,
7038     /// The element being mapped is a pointer-pointee pair; both the
7039     /// pointer and the pointee should be mapped.
7040     OMP_MAP_PTR_AND_OBJ = 0x10,
7041     /// This flags signals that the base address of an entry should be
7042     /// passed to the target kernel as an argument.
7043     OMP_MAP_TARGET_PARAM = 0x20,
7044     /// Signal that the runtime library has to return the device pointer
7045     /// in the current position for the data being mapped. Used when we have the
7046     /// use_device_ptr or use_device_addr clause.
7047     OMP_MAP_RETURN_PARAM = 0x40,
7048     /// This flag signals that the reference being passed is a pointer to
7049     /// private data.
7050     OMP_MAP_PRIVATE = 0x80,
7051     /// Pass the element to the device by value.
7052     OMP_MAP_LITERAL = 0x100,
7053     /// Implicit map
7054     OMP_MAP_IMPLICIT = 0x200,
7055     /// Close is a hint to the runtime to allocate memory close to
7056     /// the target device.
7057     OMP_MAP_CLOSE = 0x400,
7058     /// 0x800 is reserved for compatibility with XLC.
7059     /// Produce a runtime error if the data is not already allocated.
7060     OMP_MAP_PRESENT = 0x1000,
7061     /// Signal that the runtime library should use args as an array of
7062     /// descriptor_dim pointers and use args_size as dims. Used when we have
7063     /// non-contiguous list items in target update directive
7064     OMP_MAP_NON_CONTIG = 0x100000000000,
7065     /// The 16 MSBs of the flags indicate whether the entry is member of some
7066     /// struct/class.
7067     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7068     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7069   };
7070 
7071   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7072   static unsigned getFlagMemberOffset() {
7073     unsigned Offset = 0;
7074     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7075          Remain = Remain >> 1)
7076       Offset++;
7077     return Offset;
7078   }
7079 
7080   /// Class that holds debugging information for a data mapping to be passed to
7081   /// the runtime library.
7082   class MappingExprInfo {
7083     /// The variable declaration used for the data mapping.
7084     const ValueDecl *MapDecl = nullptr;
7085     /// The original expression used in the map clause, or null if there is
7086     /// none.
7087     const Expr *MapExpr = nullptr;
7088 
7089   public:
7090     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7091         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7092 
7093     const ValueDecl *getMapDecl() const { return MapDecl; }
7094     const Expr *getMapExpr() const { return MapExpr; }
7095   };
7096 
7097   /// Class that associates information with a base pointer to be passed to the
7098   /// runtime library.
7099   class BasePointerInfo {
7100     /// The base pointer.
7101     llvm::Value *Ptr = nullptr;
7102     /// The base declaration that refers to this device pointer, or null if
7103     /// there is none.
7104     const ValueDecl *DevPtrDecl = nullptr;
7105 
7106   public:
7107     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7108         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7109     llvm::Value *operator*() const { return Ptr; }
7110     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7111     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7112   };
7113 
7114   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7115   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7116   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7117   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7118   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7119   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7120   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7121 
7122   /// This structure contains combined information generated for mappable
7123   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7124   /// mappers, and non-contiguous information.
7125   struct MapCombinedInfoTy {
7126     struct StructNonContiguousInfo {
7127       bool IsNonContiguous = false;
7128       MapDimArrayTy Dims;
7129       MapNonContiguousArrayTy Offsets;
7130       MapNonContiguousArrayTy Counts;
7131       MapNonContiguousArrayTy Strides;
7132     };
7133     MapExprsArrayTy Exprs;
7134     MapBaseValuesArrayTy BasePointers;
7135     MapValuesArrayTy Pointers;
7136     MapValuesArrayTy Sizes;
7137     MapFlagsArrayTy Types;
7138     MapMappersArrayTy Mappers;
7139     StructNonContiguousInfo NonContigInfo;
7140 
7141     /// Append arrays in \a CurInfo.
7142     void append(MapCombinedInfoTy &CurInfo) {
7143       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7144       BasePointers.append(CurInfo.BasePointers.begin(),
7145                           CurInfo.BasePointers.end());
7146       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7147       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7148       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7149       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7150       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7151                                  CurInfo.NonContigInfo.Dims.end());
7152       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7153                                     CurInfo.NonContigInfo.Offsets.end());
7154       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7155                                    CurInfo.NonContigInfo.Counts.end());
7156       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7157                                     CurInfo.NonContigInfo.Strides.end());
7158     }
7159   };
7160 
7161   /// Map between a struct and the its lowest & highest elements which have been
7162   /// mapped.
7163   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7164   ///                    HE(FieldIndex, Pointer)}
7165   struct StructRangeInfoTy {
7166     MapCombinedInfoTy PreliminaryMapData;
7167     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7168         0, Address::invalid()};
7169     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7170         0, Address::invalid()};
7171     Address Base = Address::invalid();
7172     Address LB = Address::invalid();
7173     bool IsArraySection = false;
7174     bool HasCompleteRecord = false;
7175   };
7176 
7177 private:
7178   /// Kind that defines how a device pointer has to be returned.
7179   struct MapInfo {
7180     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7181     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7182     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7183     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7184     bool ReturnDevicePointer = false;
7185     bool IsImplicit = false;
7186     const ValueDecl *Mapper = nullptr;
7187     const Expr *VarRef = nullptr;
7188     bool ForDeviceAddr = false;
7189 
7190     MapInfo() = default;
7191     MapInfo(
7192         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7193         OpenMPMapClauseKind MapType,
7194         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7195         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7196         bool ReturnDevicePointer, bool IsImplicit,
7197         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7198         bool ForDeviceAddr = false)
7199         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7200           MotionModifiers(MotionModifiers),
7201           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7202           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7203   };
7204 
7205   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7206   /// member and there is no map information about it, then emission of that
7207   /// entry is deferred until the whole struct has been processed.
7208   struct DeferredDevicePtrEntryTy {
7209     const Expr *IE = nullptr;
7210     const ValueDecl *VD = nullptr;
7211     bool ForDeviceAddr = false;
7212 
7213     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7214                              bool ForDeviceAddr)
7215         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7216   };
7217 
7218   /// The target directive from where the mappable clauses were extracted. It
7219   /// is either a executable directive or a user-defined mapper directive.
7220   llvm::PointerUnion<const OMPExecutableDirective *,
7221                      const OMPDeclareMapperDecl *>
7222       CurDir;
7223 
7224   /// Function the directive is being generated for.
7225   CodeGenFunction &CGF;
7226 
7227   /// Set of all first private variables in the current directive.
7228   /// bool data is set to true if the variable is implicitly marked as
7229   /// firstprivate, false otherwise.
7230   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7231 
7232   /// Map between device pointer declarations and their expression components.
7233   /// The key value for declarations in 'this' is null.
7234   llvm::DenseMap<
7235       const ValueDecl *,
7236       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7237       DevPointersMap;
7238 
7239   llvm::Value *getExprTypeSize(const Expr *E) const {
7240     QualType ExprTy = E->getType().getCanonicalType();
7241 
7242     // Calculate the size for array shaping expression.
7243     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7244       llvm::Value *Size =
7245           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7246       for (const Expr *SE : OAE->getDimensions()) {
7247         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7248         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7249                                       CGF.getContext().getSizeType(),
7250                                       SE->getExprLoc());
7251         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7252       }
7253       return Size;
7254     }
7255 
7256     // Reference types are ignored for mapping purposes.
7257     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7258       ExprTy = RefTy->getPointeeType().getCanonicalType();
7259 
7260     // Given that an array section is considered a built-in type, we need to
7261     // do the calculation based on the length of the section instead of relying
7262     // on CGF.getTypeSize(E->getType()).
7263     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7264       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7265                             OAE->getBase()->IgnoreParenImpCasts())
7266                             .getCanonicalType();
7267 
7268       // If there is no length associated with the expression and lower bound is
7269       // not specified too, that means we are using the whole length of the
7270       // base.
7271       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7272           !OAE->getLowerBound())
7273         return CGF.getTypeSize(BaseTy);
7274 
7275       llvm::Value *ElemSize;
7276       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7277         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7278       } else {
7279         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7280         assert(ATy && "Expecting array type if not a pointer type.");
7281         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7282       }
7283 
7284       // If we don't have a length at this point, that is because we have an
7285       // array section with a single element.
7286       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7287         return ElemSize;
7288 
7289       if (const Expr *LenExpr = OAE->getLength()) {
7290         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7291         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7292                                              CGF.getContext().getSizeType(),
7293                                              LenExpr->getExprLoc());
7294         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7295       }
7296       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7297              OAE->getLowerBound() && "expected array_section[lb:].");
7298       // Size = sizetype - lb * elemtype;
7299       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7300       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7301       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7302                                        CGF.getContext().getSizeType(),
7303                                        OAE->getLowerBound()->getExprLoc());
7304       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7305       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7306       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7307       LengthVal = CGF.Builder.CreateSelect(
7308           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7309       return LengthVal;
7310     }
7311     return CGF.getTypeSize(ExprTy);
7312   }
7313 
7314   /// Return the corresponding bits for a given map clause modifier. Add
7315   /// a flag marking the map as a pointer if requested. Add a flag marking the
7316   /// map as the first one of a series of maps that relate to the same map
7317   /// expression.
7318   OpenMPOffloadMappingFlags getMapTypeBits(
7319       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7320       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7321       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7322     OpenMPOffloadMappingFlags Bits =
7323         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7324     switch (MapType) {
7325     case OMPC_MAP_alloc:
7326     case OMPC_MAP_release:
7327       // alloc and release is the default behavior in the runtime library,  i.e.
7328       // if we don't pass any bits alloc/release that is what the runtime is
7329       // going to do. Therefore, we don't need to signal anything for these two
7330       // type modifiers.
7331       break;
7332     case OMPC_MAP_to:
7333       Bits |= OMP_MAP_TO;
7334       break;
7335     case OMPC_MAP_from:
7336       Bits |= OMP_MAP_FROM;
7337       break;
7338     case OMPC_MAP_tofrom:
7339       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7340       break;
7341     case OMPC_MAP_delete:
7342       Bits |= OMP_MAP_DELETE;
7343       break;
7344     case OMPC_MAP_unknown:
7345       llvm_unreachable("Unexpected map type!");
7346     }
7347     if (AddPtrFlag)
7348       Bits |= OMP_MAP_PTR_AND_OBJ;
7349     if (AddIsTargetParamFlag)
7350       Bits |= OMP_MAP_TARGET_PARAM;
7351     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7352         != MapModifiers.end())
7353       Bits |= OMP_MAP_ALWAYS;
7354     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7355         != MapModifiers.end())
7356       Bits |= OMP_MAP_CLOSE;
7357     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7358             MapModifiers.end() ||
7359         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7360             MotionModifiers.end())
7361       Bits |= OMP_MAP_PRESENT;
7362     if (IsNonContiguous)
7363       Bits |= OMP_MAP_NON_CONTIG;
7364     return Bits;
7365   }
7366 
7367   /// Return true if the provided expression is a final array section. A
7368   /// final array section, is one whose length can't be proved to be one.
7369   bool isFinalArraySectionExpression(const Expr *E) const {
7370     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7371 
7372     // It is not an array section and therefore not a unity-size one.
7373     if (!OASE)
7374       return false;
7375 
7376     // An array section with no colon always refer to a single element.
7377     if (OASE->getColonLocFirst().isInvalid())
7378       return false;
7379 
7380     const Expr *Length = OASE->getLength();
7381 
7382     // If we don't have a length we have to check if the array has size 1
7383     // for this dimension. Also, we should always expect a length if the
7384     // base type is pointer.
7385     if (!Length) {
7386       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7387                              OASE->getBase()->IgnoreParenImpCasts())
7388                              .getCanonicalType();
7389       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7390         return ATy->getSize().getSExtValue() != 1;
7391       // If we don't have a constant dimension length, we have to consider
7392       // the current section as having any size, so it is not necessarily
7393       // unitary. If it happen to be unity size, that's user fault.
7394       return true;
7395     }
7396 
7397     // Check if the length evaluates to 1.
7398     Expr::EvalResult Result;
7399     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7400       return true; // Can have more that size 1.
7401 
7402     llvm::APSInt ConstLength = Result.Val.getInt();
7403     return ConstLength.getSExtValue() != 1;
7404   }
7405 
7406   /// Generate the base pointers, section pointers, sizes, map type bits, and
7407   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7408   /// map type, map or motion modifiers, and expression components.
7409   /// \a IsFirstComponent should be set to true if the provided set of
7410   /// components is the first associated with a capture.
7411   void generateInfoForComponentList(
7412       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7413       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7414       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7415       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7416       bool IsFirstComponentList, bool IsImplicit,
7417       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7418       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7419       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7420           OverlappedElements = llvm::None) const {
7421     // The following summarizes what has to be generated for each map and the
7422     // types below. The generated information is expressed in this order:
7423     // base pointer, section pointer, size, flags
7424     // (to add to the ones that come from the map type and modifier).
7425     //
7426     // double d;
7427     // int i[100];
7428     // float *p;
7429     //
7430     // struct S1 {
7431     //   int i;
7432     //   float f[50];
7433     // }
7434     // struct S2 {
7435     //   int i;
7436     //   float f[50];
7437     //   S1 s;
7438     //   double *p;
7439     //   struct S2 *ps;
7440     // }
7441     // S2 s;
7442     // S2 *ps;
7443     //
7444     // map(d)
7445     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7446     //
7447     // map(i)
7448     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7449     //
7450     // map(i[1:23])
7451     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7452     //
7453     // map(p)
7454     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7455     //
7456     // map(p[1:24])
7457     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7458     // in unified shared memory mode or for local pointers
7459     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7460     //
7461     // map(s)
7462     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7463     //
7464     // map(s.i)
7465     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7466     //
7467     // map(s.s.f)
7468     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7469     //
7470     // map(s.p)
7471     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7472     //
7473     // map(to: s.p[:22])
7474     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7475     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7476     // &(s.p), &(s.p[0]), 22*sizeof(double),
7477     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7478     // (*) alloc space for struct members, only this is a target parameter
7479     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7480     //      optimizes this entry out, same in the examples below)
7481     // (***) map the pointee (map: to)
7482     //
7483     // map(s.ps)
7484     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7485     //
7486     // map(from: s.ps->s.i)
7487     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7488     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7489     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7490     //
7491     // map(to: s.ps->ps)
7492     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7493     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7494     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7495     //
7496     // map(s.ps->ps->ps)
7497     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7498     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7499     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7500     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7501     //
7502     // map(to: s.ps->ps->s.f[:22])
7503     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7504     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7505     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7506     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7507     //
7508     // map(ps)
7509     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7510     //
7511     // map(ps->i)
7512     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7513     //
7514     // map(ps->s.f)
7515     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7516     //
7517     // map(from: ps->p)
7518     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7519     //
7520     // map(to: ps->p[:22])
7521     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7522     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7523     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7524     //
7525     // map(ps->ps)
7526     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7527     //
7528     // map(from: ps->ps->s.i)
7529     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7530     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7531     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7532     //
7533     // map(from: ps->ps->ps)
7534     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7535     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7536     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7537     //
7538     // map(ps->ps->ps->ps)
7539     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7540     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7541     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7542     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7543     //
7544     // map(to: ps->ps->ps->s.f[:22])
7545     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7546     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7547     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7548     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7549     //
7550     // map(to: s.f[:22]) map(from: s.p[:33])
7551     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7552     //     sizeof(double*) (**), TARGET_PARAM
7553     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7554     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7555     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7556     // (*) allocate contiguous space needed to fit all mapped members even if
7557     //     we allocate space for members not mapped (in this example,
7558     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7559     //     them as well because they fall between &s.f[0] and &s.p)
7560     //
7561     // map(from: s.f[:22]) map(to: ps->p[:33])
7562     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7563     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7564     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7565     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7566     // (*) the struct this entry pertains to is the 2nd element in the list of
7567     //     arguments, hence MEMBER_OF(2)
7568     //
7569     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7570     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7571     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7572     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7573     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7574     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7575     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7576     // (*) the struct this entry pertains to is the 4th element in the list
7577     //     of arguments, hence MEMBER_OF(4)
7578 
7579     // Track if the map information being generated is the first for a capture.
7580     bool IsCaptureFirstInfo = IsFirstComponentList;
7581     // When the variable is on a declare target link or in a to clause with
7582     // unified memory, a reference is needed to hold the host/device address
7583     // of the variable.
7584     bool RequiresReference = false;
7585 
7586     // Scan the components from the base to the complete expression.
7587     auto CI = Components.rbegin();
7588     auto CE = Components.rend();
7589     auto I = CI;
7590 
7591     // Track if the map information being generated is the first for a list of
7592     // components.
7593     bool IsExpressionFirstInfo = true;
7594     bool FirstPointerInComplexData = false;
7595     Address BP = Address::invalid();
7596     const Expr *AssocExpr = I->getAssociatedExpression();
7597     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7598     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7599     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7600 
7601     if (isa<MemberExpr>(AssocExpr)) {
7602       // The base is the 'this' pointer. The content of the pointer is going
7603       // to be the base of the field being mapped.
7604       BP = CGF.LoadCXXThisAddress();
7605     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7606                (OASE &&
7607                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7608       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7609     } else if (OAShE &&
7610                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7611       BP = Address(
7612           CGF.EmitScalarExpr(OAShE->getBase()),
7613           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7614     } else {
7615       // The base is the reference to the variable.
7616       // BP = &Var.
7617       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7618       if (const auto *VD =
7619               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7620         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7621                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7622           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7623               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7624                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7625             RequiresReference = true;
7626             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7627           }
7628         }
7629       }
7630 
7631       // If the variable is a pointer and is being dereferenced (i.e. is not
7632       // the last component), the base has to be the pointer itself, not its
7633       // reference. References are ignored for mapping purposes.
7634       QualType Ty =
7635           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7636       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7637         // No need to generate individual map information for the pointer, it
7638         // can be associated with the combined storage if shared memory mode is
7639         // active or the base declaration is not global variable.
7640         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7641         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7642             !VD || VD->hasLocalStorage())
7643           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7644         else
7645           FirstPointerInComplexData = true;
7646         ++I;
7647       }
7648     }
7649 
7650     // Track whether a component of the list should be marked as MEMBER_OF some
7651     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7652     // in a component list should be marked as MEMBER_OF, all subsequent entries
7653     // do not belong to the base struct. E.g.
7654     // struct S2 s;
7655     // s.ps->ps->ps->f[:]
7656     //   (1) (2) (3) (4)
7657     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7658     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7659     // is the pointee of ps(2) which is not member of struct s, so it should not
7660     // be marked as such (it is still PTR_AND_OBJ).
7661     // The variable is initialized to false so that PTR_AND_OBJ entries which
7662     // are not struct members are not considered (e.g. array of pointers to
7663     // data).
7664     bool ShouldBeMemberOf = false;
7665 
7666     // Variable keeping track of whether or not we have encountered a component
7667     // in the component list which is a member expression. Useful when we have a
7668     // pointer or a final array section, in which case it is the previous
7669     // component in the list which tells us whether we have a member expression.
7670     // E.g. X.f[:]
7671     // While processing the final array section "[:]" it is "f" which tells us
7672     // whether we are dealing with a member of a declared struct.
7673     const MemberExpr *EncounteredME = nullptr;
7674 
7675     // Track for the total number of dimension. Start from one for the dummy
7676     // dimension.
7677     uint64_t DimSize = 1;
7678 
7679     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7680 
7681     for (; I != CE; ++I) {
7682       // If the current component is member of a struct (parent struct) mark it.
7683       if (!EncounteredME) {
7684         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7685         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7686         // as MEMBER_OF the parent struct.
7687         if (EncounteredME) {
7688           ShouldBeMemberOf = true;
7689           // Do not emit as complex pointer if this is actually not array-like
7690           // expression.
7691           if (FirstPointerInComplexData) {
7692             QualType Ty = std::prev(I)
7693                               ->getAssociatedDeclaration()
7694                               ->getType()
7695                               .getNonReferenceType();
7696             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7697             FirstPointerInComplexData = false;
7698           }
7699         }
7700       }
7701 
7702       auto Next = std::next(I);
7703 
7704       // We need to generate the addresses and sizes if this is the last
7705       // component, if the component is a pointer or if it is an array section
7706       // whose length can't be proved to be one. If this is a pointer, it
7707       // becomes the base address for the following components.
7708 
7709       // A final array section, is one whose length can't be proved to be one.
7710       // If the map item is non-contiguous then we don't treat any array section
7711       // as final array section.
7712       bool IsFinalArraySection =
7713           !IsNonContiguous &&
7714           isFinalArraySectionExpression(I->getAssociatedExpression());
7715 
7716       // If we have a declaration for the mapping use that, otherwise use
7717       // the base declaration of the map clause.
7718       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7719                                      ? I->getAssociatedDeclaration()
7720                                      : BaseDecl;
7721 
7722       // Get information on whether the element is a pointer. Have to do a
7723       // special treatment for array sections given that they are built-in
7724       // types.
7725       const auto *OASE =
7726           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7727       const auto *OAShE =
7728           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7729       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7730       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7731       bool IsPointer =
7732           OAShE ||
7733           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7734                        .getCanonicalType()
7735                        ->isAnyPointerType()) ||
7736           I->getAssociatedExpression()->getType()->isAnyPointerType();
7737       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7738 
7739       if (OASE)
7740         ++DimSize;
7741 
7742       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7743         // If this is not the last component, we expect the pointer to be
7744         // associated with an array expression or member expression.
7745         assert((Next == CE ||
7746                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7747                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7748                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7749                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7750                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7751                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7752                "Unexpected expression");
7753 
7754         Address LB = Address::invalid();
7755         if (OAShE) {
7756           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7757                        CGF.getContext().getTypeAlignInChars(
7758                            OAShE->getBase()->getType()));
7759         } else {
7760           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7761                    .getAddress(CGF);
7762         }
7763 
7764         // If this component is a pointer inside the base struct then we don't
7765         // need to create any entry for it - it will be combined with the object
7766         // it is pointing to into a single PTR_AND_OBJ entry.
7767         bool IsMemberPointerOrAddr =
7768             (IsPointer || ForDeviceAddr) && EncounteredME &&
7769             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7770              EncounteredME);
7771         if (!OverlappedElements.empty() && Next == CE) {
7772           // Handle base element with the info for overlapped elements.
7773           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7774           assert(!IsPointer &&
7775                  "Unexpected base element with the pointer type.");
7776           // Mark the whole struct as the struct that requires allocation on the
7777           // device.
7778           PartialStruct.LowestElem = {0, LB};
7779           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7780               I->getAssociatedExpression()->getType());
7781           Address HB = CGF.Builder.CreateConstGEP(
7782               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7783                                                               CGF.VoidPtrTy),
7784               TypeSize.getQuantity() - 1);
7785           PartialStruct.HighestElem = {
7786               std::numeric_limits<decltype(
7787                   PartialStruct.HighestElem.first)>::max(),
7788               HB};
7789           PartialStruct.Base = BP;
7790           PartialStruct.LB = LB;
7791           assert(
7792               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7793               "Overlapped elements must be used only once for the variable.");
7794           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7795           // Emit data for non-overlapped data.
7796           OpenMPOffloadMappingFlags Flags =
7797               OMP_MAP_MEMBER_OF |
7798               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7799                              /*AddPtrFlag=*/false,
7800                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7801           llvm::Value *Size = nullptr;
7802           // Do bitcopy of all non-overlapped structure elements.
7803           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7804                    Component : OverlappedElements) {
7805             Address ComponentLB = Address::invalid();
7806             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7807                  Component) {
7808               if (MC.getAssociatedDeclaration()) {
7809                 ComponentLB =
7810                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7811                         .getAddress(CGF);
7812                 Size = CGF.Builder.CreatePtrDiff(
7813                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7814                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7815                 break;
7816               }
7817             }
7818             assert(Size && "Failed to determine structure size");
7819             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7820             CombinedInfo.BasePointers.push_back(BP.getPointer());
7821             CombinedInfo.Pointers.push_back(LB.getPointer());
7822             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7823                 Size, CGF.Int64Ty, /*isSigned=*/true));
7824             CombinedInfo.Types.push_back(Flags);
7825             CombinedInfo.Mappers.push_back(nullptr);
7826             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7827                                                                       : 1);
7828             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7829           }
7830           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7831           CombinedInfo.BasePointers.push_back(BP.getPointer());
7832           CombinedInfo.Pointers.push_back(LB.getPointer());
7833           Size = CGF.Builder.CreatePtrDiff(
7834               CGF.EmitCastToVoidPtr(
7835                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7836               CGF.EmitCastToVoidPtr(LB.getPointer()));
7837           CombinedInfo.Sizes.push_back(
7838               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7839           CombinedInfo.Types.push_back(Flags);
7840           CombinedInfo.Mappers.push_back(nullptr);
7841           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7842                                                                     : 1);
7843           break;
7844         }
7845         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7846         if (!IsMemberPointerOrAddr ||
7847             (Next == CE && MapType != OMPC_MAP_unknown)) {
7848           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7849           CombinedInfo.BasePointers.push_back(BP.getPointer());
7850           CombinedInfo.Pointers.push_back(LB.getPointer());
7851           CombinedInfo.Sizes.push_back(
7852               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7853           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7854                                                                     : 1);
7855 
7856           // If Mapper is valid, the last component inherits the mapper.
7857           bool HasMapper = Mapper && Next == CE;
7858           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7859 
7860           // We need to add a pointer flag for each map that comes from the
7861           // same expression except for the first one. We also need to signal
7862           // this map is the first one that relates with the current capture
7863           // (there is a set of entries for each capture).
7864           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7865               MapType, MapModifiers, MotionModifiers, IsImplicit,
7866               !IsExpressionFirstInfo || RequiresReference ||
7867                   FirstPointerInComplexData,
7868               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7869 
7870           if (!IsExpressionFirstInfo) {
7871             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7872             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7873             if (IsPointer)
7874               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7875                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7876 
7877             if (ShouldBeMemberOf) {
7878               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7879               // should be later updated with the correct value of MEMBER_OF.
7880               Flags |= OMP_MAP_MEMBER_OF;
7881               // From now on, all subsequent PTR_AND_OBJ entries should not be
7882               // marked as MEMBER_OF.
7883               ShouldBeMemberOf = false;
7884             }
7885           }
7886 
7887           CombinedInfo.Types.push_back(Flags);
7888         }
7889 
7890         // If we have encountered a member expression so far, keep track of the
7891         // mapped member. If the parent is "*this", then the value declaration
7892         // is nullptr.
7893         if (EncounteredME) {
7894           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7895           unsigned FieldIndex = FD->getFieldIndex();
7896 
7897           // Update info about the lowest and highest elements for this struct
7898           if (!PartialStruct.Base.isValid()) {
7899             PartialStruct.LowestElem = {FieldIndex, LB};
7900             if (IsFinalArraySection) {
7901               Address HB =
7902                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7903                       .getAddress(CGF);
7904               PartialStruct.HighestElem = {FieldIndex, HB};
7905             } else {
7906               PartialStruct.HighestElem = {FieldIndex, LB};
7907             }
7908             PartialStruct.Base = BP;
7909             PartialStruct.LB = BP;
7910           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7911             PartialStruct.LowestElem = {FieldIndex, LB};
7912           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7913             PartialStruct.HighestElem = {FieldIndex, LB};
7914           }
7915         }
7916 
7917         // Need to emit combined struct for array sections.
7918         if (IsFinalArraySection || IsNonContiguous)
7919           PartialStruct.IsArraySection = true;
7920 
7921         // If we have a final array section, we are done with this expression.
7922         if (IsFinalArraySection)
7923           break;
7924 
7925         // The pointer becomes the base for the next element.
7926         if (Next != CE)
7927           BP = LB;
7928 
7929         IsExpressionFirstInfo = false;
7930         IsCaptureFirstInfo = false;
7931         FirstPointerInComplexData = false;
7932       } else if (FirstPointerInComplexData) {
7933         QualType Ty = Components.rbegin()
7934                           ->getAssociatedDeclaration()
7935                           ->getType()
7936                           .getNonReferenceType();
7937         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7938         FirstPointerInComplexData = false;
7939       }
7940     }
7941     // If ran into the whole component - allocate the space for the whole
7942     // record.
7943     if (!EncounteredME)
7944       PartialStruct.HasCompleteRecord = true;
7945 
7946     if (!IsNonContiguous)
7947       return;
7948 
7949     const ASTContext &Context = CGF.getContext();
7950 
7951     // For supporting stride in array section, we need to initialize the first
7952     // dimension size as 1, first offset as 0, and first count as 1
7953     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7954     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7955     MapValuesArrayTy CurStrides;
7956     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7957     uint64_t ElementTypeSize;
7958 
7959     // Collect Size information for each dimension and get the element size as
7960     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7961     // should be [10, 10] and the first stride is 4 btyes.
7962     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7963          Components) {
7964       const Expr *AssocExpr = Component.getAssociatedExpression();
7965       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7966 
7967       if (!OASE)
7968         continue;
7969 
7970       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7971       auto *CAT = Context.getAsConstantArrayType(Ty);
7972       auto *VAT = Context.getAsVariableArrayType(Ty);
7973 
7974       // We need all the dimension size except for the last dimension.
7975       assert((VAT || CAT || &Component == &*Components.begin()) &&
7976              "Should be either ConstantArray or VariableArray if not the "
7977              "first Component");
7978 
7979       // Get element size if CurStrides is empty.
7980       if (CurStrides.empty()) {
7981         const Type *ElementType = nullptr;
7982         if (CAT)
7983           ElementType = CAT->getElementType().getTypePtr();
7984         else if (VAT)
7985           ElementType = VAT->getElementType().getTypePtr();
7986         else
7987           assert(&Component == &*Components.begin() &&
7988                  "Only expect pointer (non CAT or VAT) when this is the "
7989                  "first Component");
7990         // If ElementType is null, then it means the base is a pointer
7991         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7992         // for next iteration.
7993         if (ElementType) {
7994           // For the case that having pointer as base, we need to remove one
7995           // level of indirection.
7996           if (&Component != &*Components.begin())
7997             ElementType = ElementType->getPointeeOrArrayElementType();
7998           ElementTypeSize =
7999               Context.getTypeSizeInChars(ElementType).getQuantity();
8000           CurStrides.push_back(
8001               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8002         }
8003       }
8004       // Get dimension value except for the last dimension since we don't need
8005       // it.
8006       if (DimSizes.size() < Components.size() - 1) {
8007         if (CAT)
8008           DimSizes.push_back(llvm::ConstantInt::get(
8009               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8010         else if (VAT)
8011           DimSizes.push_back(CGF.Builder.CreateIntCast(
8012               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8013               /*IsSigned=*/false));
8014       }
8015     }
8016 
8017     // Skip the dummy dimension since we have already have its information.
8018     auto DI = DimSizes.begin() + 1;
8019     // Product of dimension.
8020     llvm::Value *DimProd =
8021         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8022 
8023     // Collect info for non-contiguous. Notice that offset, count, and stride
8024     // are only meaningful for array-section, so we insert a null for anything
8025     // other than array-section.
8026     // Also, the size of offset, count, and stride are not the same as
8027     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8028     // count, and stride are the same as the number of non-contiguous
8029     // declaration in target update to/from clause.
8030     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8031          Components) {
8032       const Expr *AssocExpr = Component.getAssociatedExpression();
8033 
8034       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8035         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8036             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8037             /*isSigned=*/false);
8038         CurOffsets.push_back(Offset);
8039         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8040         CurStrides.push_back(CurStrides.back());
8041         continue;
8042       }
8043 
8044       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8045 
8046       if (!OASE)
8047         continue;
8048 
8049       // Offset
8050       const Expr *OffsetExpr = OASE->getLowerBound();
8051       llvm::Value *Offset = nullptr;
8052       if (!OffsetExpr) {
8053         // If offset is absent, then we just set it to zero.
8054         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8055       } else {
8056         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8057                                            CGF.Int64Ty,
8058                                            /*isSigned=*/false);
8059       }
8060       CurOffsets.push_back(Offset);
8061 
8062       // Count
8063       const Expr *CountExpr = OASE->getLength();
8064       llvm::Value *Count = nullptr;
8065       if (!CountExpr) {
8066         // In Clang, once a high dimension is an array section, we construct all
8067         // the lower dimension as array section, however, for case like
8068         // arr[0:2][2], Clang construct the inner dimension as an array section
8069         // but it actually is not in an array section form according to spec.
8070         if (!OASE->getColonLocFirst().isValid() &&
8071             !OASE->getColonLocSecond().isValid()) {
8072           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8073         } else {
8074           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8075           // When the length is absent it defaults to ⌈(size −
8076           // lower-bound)/stride⌉, where size is the size of the array
8077           // dimension.
8078           const Expr *StrideExpr = OASE->getStride();
8079           llvm::Value *Stride =
8080               StrideExpr
8081                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8082                                               CGF.Int64Ty, /*isSigned=*/false)
8083                   : nullptr;
8084           if (Stride)
8085             Count = CGF.Builder.CreateUDiv(
8086                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8087           else
8088             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8089         }
8090       } else {
8091         Count = CGF.EmitScalarExpr(CountExpr);
8092       }
8093       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8094       CurCounts.push_back(Count);
8095 
8096       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8097       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8098       //              Offset      Count     Stride
8099       //    D0          0           1         4    (int)    <- dummy dimension
8100       //    D1          0           2         8    (2 * (1) * 4)
8101       //    D2          1           2         20   (1 * (1 * 5) * 4)
8102       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8103       const Expr *StrideExpr = OASE->getStride();
8104       llvm::Value *Stride =
8105           StrideExpr
8106               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8107                                           CGF.Int64Ty, /*isSigned=*/false)
8108               : nullptr;
8109       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8110       if (Stride)
8111         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8112       else
8113         CurStrides.push_back(DimProd);
8114       if (DI != DimSizes.end())
8115         ++DI;
8116     }
8117 
8118     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8119     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8120     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8121   }
8122 
8123   /// Return the adjusted map modifiers if the declaration a capture refers to
8124   /// appears in a first-private clause. This is expected to be used only with
8125   /// directives that start with 'target'.
8126   MappableExprsHandler::OpenMPOffloadMappingFlags
8127   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8128     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8129 
8130     // A first private variable captured by reference will use only the
8131     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8132     // declaration is known as first-private in this handler.
8133     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8134       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8135           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8136         return MappableExprsHandler::OMP_MAP_ALWAYS |
8137                MappableExprsHandler::OMP_MAP_TO;
8138       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8139         return MappableExprsHandler::OMP_MAP_TO |
8140                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8141       return MappableExprsHandler::OMP_MAP_PRIVATE |
8142              MappableExprsHandler::OMP_MAP_TO;
8143     }
8144     return MappableExprsHandler::OMP_MAP_TO |
8145            MappableExprsHandler::OMP_MAP_FROM;
8146   }
8147 
8148   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8149     // Rotate by getFlagMemberOffset() bits.
8150     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8151                                                   << getFlagMemberOffset());
8152   }
8153 
8154   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8155                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8156     // If the entry is PTR_AND_OBJ but has not been marked with the special
8157     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8158     // marked as MEMBER_OF.
8159     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8160         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8161       return;
8162 
8163     // Reset the placeholder value to prepare the flag for the assignment of the
8164     // proper MEMBER_OF value.
8165     Flags &= ~OMP_MAP_MEMBER_OF;
8166     Flags |= MemberOfFlag;
8167   }
8168 
8169   void getPlainLayout(const CXXRecordDecl *RD,
8170                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8171                       bool AsBase) const {
8172     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8173 
8174     llvm::StructType *St =
8175         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8176 
8177     unsigned NumElements = St->getNumElements();
8178     llvm::SmallVector<
8179         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8180         RecordLayout(NumElements);
8181 
8182     // Fill bases.
8183     for (const auto &I : RD->bases()) {
8184       if (I.isVirtual())
8185         continue;
8186       const auto *Base = I.getType()->getAsCXXRecordDecl();
8187       // Ignore empty bases.
8188       if (Base->isEmpty() || CGF.getContext()
8189                                  .getASTRecordLayout(Base)
8190                                  .getNonVirtualSize()
8191                                  .isZero())
8192         continue;
8193 
8194       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8195       RecordLayout[FieldIndex] = Base;
8196     }
8197     // Fill in virtual bases.
8198     for (const auto &I : RD->vbases()) {
8199       const auto *Base = I.getType()->getAsCXXRecordDecl();
8200       // Ignore empty bases.
8201       if (Base->isEmpty())
8202         continue;
8203       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8204       if (RecordLayout[FieldIndex])
8205         continue;
8206       RecordLayout[FieldIndex] = Base;
8207     }
8208     // Fill in all the fields.
8209     assert(!RD->isUnion() && "Unexpected union.");
8210     for (const auto *Field : RD->fields()) {
8211       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8212       // will fill in later.)
8213       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8214         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8215         RecordLayout[FieldIndex] = Field;
8216       }
8217     }
8218     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8219              &Data : RecordLayout) {
8220       if (Data.isNull())
8221         continue;
8222       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8223         getPlainLayout(Base, Layout, /*AsBase=*/true);
8224       else
8225         Layout.push_back(Data.get<const FieldDecl *>());
8226     }
8227   }
8228 
8229   /// Generate all the base pointers, section pointers, sizes, map types, and
8230   /// mappers for the extracted mappable expressions (all included in \a
8231   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8232   /// pair of the relevant declaration and index where it occurs is appended to
8233   /// the device pointers info array.
8234   void generateAllInfoForClauses(
8235       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8236       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8237           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8238     // We have to process the component lists that relate with the same
8239     // declaration in a single chunk so that we can generate the map flags
8240     // correctly. Therefore, we organize all lists in a map.
8241     enum MapKind { Present, Allocs, Other, Total };
8242     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8243                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8244         Info;
8245 
8246     // Helper function to fill the information map for the different supported
8247     // clauses.
8248     auto &&InfoGen =
8249         [&Info, &SkipVarSet](
8250             const ValueDecl *D, MapKind Kind,
8251             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8252             OpenMPMapClauseKind MapType,
8253             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8254             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8255             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8256             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8257           if (SkipVarSet.contains(D))
8258             return;
8259           auto It = Info.find(D);
8260           if (It == Info.end())
8261             It = Info
8262                      .insert(std::make_pair(
8263                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8264                      .first;
8265           It->second[Kind].emplace_back(
8266               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8267               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8268         };
8269 
8270     for (const auto *Cl : Clauses) {
8271       const auto *C = dyn_cast<OMPMapClause>(Cl);
8272       if (!C)
8273         continue;
8274       MapKind Kind = Other;
8275       if (!C->getMapTypeModifiers().empty() &&
8276           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8277             return K == OMPC_MAP_MODIFIER_present;
8278           }))
8279         Kind = Present;
8280       else if (C->getMapType() == OMPC_MAP_alloc)
8281         Kind = Allocs;
8282       const auto *EI = C->getVarRefs().begin();
8283       for (const auto L : C->component_lists()) {
8284         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8285         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8286                 C->getMapTypeModifiers(), llvm::None,
8287                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8288                 E);
8289         ++EI;
8290       }
8291     }
8292     for (const auto *Cl : Clauses) {
8293       const auto *C = dyn_cast<OMPToClause>(Cl);
8294       if (!C)
8295         continue;
8296       MapKind Kind = Other;
8297       if (!C->getMotionModifiers().empty() &&
8298           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8299             return K == OMPC_MOTION_MODIFIER_present;
8300           }))
8301         Kind = Present;
8302       const auto *EI = C->getVarRefs().begin();
8303       for (const auto L : C->component_lists()) {
8304         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8305                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8306                 C->isImplicit(), std::get<2>(L), *EI);
8307         ++EI;
8308       }
8309     }
8310     for (const auto *Cl : Clauses) {
8311       const auto *C = dyn_cast<OMPFromClause>(Cl);
8312       if (!C)
8313         continue;
8314       MapKind Kind = Other;
8315       if (!C->getMotionModifiers().empty() &&
8316           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8317             return K == OMPC_MOTION_MODIFIER_present;
8318           }))
8319         Kind = Present;
8320       const auto *EI = C->getVarRefs().begin();
8321       for (const auto L : C->component_lists()) {
8322         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8323                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8324                 C->isImplicit(), std::get<2>(L), *EI);
8325         ++EI;
8326       }
8327     }
8328 
8329     // Look at the use_device_ptr clause information and mark the existing map
8330     // entries as such. If there is no map information for an entry in the
8331     // use_device_ptr list, we create one with map type 'alloc' and zero size
8332     // section. It is the user fault if that was not mapped before. If there is
8333     // no map information and the pointer is a struct member, then we defer the
8334     // emission of that entry until the whole struct has been processed.
8335     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8336                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8337         DeferredInfo;
8338     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8339 
8340     for (const auto *Cl : Clauses) {
8341       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8342       if (!C)
8343         continue;
8344       for (const auto L : C->component_lists()) {
8345         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8346             std::get<1>(L);
8347         assert(!Components.empty() &&
8348                "Not expecting empty list of components!");
8349         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8350         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8351         const Expr *IE = Components.back().getAssociatedExpression();
8352         // If the first component is a member expression, we have to look into
8353         // 'this', which maps to null in the map of map information. Otherwise
8354         // look directly for the information.
8355         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8356 
8357         // We potentially have map information for this declaration already.
8358         // Look for the first set of components that refer to it.
8359         if (It != Info.end()) {
8360           bool Found = false;
8361           for (auto &Data : It->second) {
8362             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8363               return MI.Components.back().getAssociatedDeclaration() == VD;
8364             });
8365             // If we found a map entry, signal that the pointer has to be
8366             // returned and move on to the next declaration. Exclude cases where
8367             // the base pointer is mapped as array subscript, array section or
8368             // array shaping. The base address is passed as a pointer to base in
8369             // this case and cannot be used as a base for use_device_ptr list
8370             // item.
8371             if (CI != Data.end()) {
8372               auto PrevCI = std::next(CI->Components.rbegin());
8373               const auto *VarD = dyn_cast<VarDecl>(VD);
8374               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8375                   isa<MemberExpr>(IE) ||
8376                   !VD->getType().getNonReferenceType()->isPointerType() ||
8377                   PrevCI == CI->Components.rend() ||
8378                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8379                   VarD->hasLocalStorage()) {
8380                 CI->ReturnDevicePointer = true;
8381                 Found = true;
8382                 break;
8383               }
8384             }
8385           }
8386           if (Found)
8387             continue;
8388         }
8389 
8390         // We didn't find any match in our map information - generate a zero
8391         // size array section - if the pointer is a struct member we defer this
8392         // action until the whole struct has been processed.
8393         if (isa<MemberExpr>(IE)) {
8394           // Insert the pointer into Info to be processed by
8395           // generateInfoForComponentList. Because it is a member pointer
8396           // without a pointee, no entry will be generated for it, therefore
8397           // we need to generate one after the whole struct has been processed.
8398           // Nonetheless, generateInfoForComponentList must be called to take
8399           // the pointer into account for the calculation of the range of the
8400           // partial struct.
8401           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8402                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8403                   nullptr);
8404           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8405         } else {
8406           llvm::Value *Ptr =
8407               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8408           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8409           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8410           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8411           UseDevicePtrCombinedInfo.Sizes.push_back(
8412               llvm::Constant::getNullValue(CGF.Int64Ty));
8413           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8414           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8415         }
8416       }
8417     }
8418 
8419     // Look at the use_device_addr clause information and mark the existing map
8420     // entries as such. If there is no map information for an entry in the
8421     // use_device_addr list, we create one with map type 'alloc' and zero size
8422     // section. It is the user fault if that was not mapped before. If there is
8423     // no map information and the pointer is a struct member, then we defer the
8424     // emission of that entry until the whole struct has been processed.
8425     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8426     for (const auto *Cl : Clauses) {
8427       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8428       if (!C)
8429         continue;
8430       for (const auto L : C->component_lists()) {
8431         assert(!std::get<1>(L).empty() &&
8432                "Not expecting empty list of components!");
8433         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8434         if (!Processed.insert(VD).second)
8435           continue;
8436         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8437         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8438         // If the first component is a member expression, we have to look into
8439         // 'this', which maps to null in the map of map information. Otherwise
8440         // look directly for the information.
8441         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8442 
8443         // We potentially have map information for this declaration already.
8444         // Look for the first set of components that refer to it.
8445         if (It != Info.end()) {
8446           bool Found = false;
8447           for (auto &Data : It->second) {
8448             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8449               return MI.Components.back().getAssociatedDeclaration() == VD;
8450             });
8451             // If we found a map entry, signal that the pointer has to be
8452             // returned and move on to the next declaration.
8453             if (CI != Data.end()) {
8454               CI->ReturnDevicePointer = true;
8455               Found = true;
8456               break;
8457             }
8458           }
8459           if (Found)
8460             continue;
8461         }
8462 
8463         // We didn't find any match in our map information - generate a zero
8464         // size array section - if the pointer is a struct member we defer this
8465         // action until the whole struct has been processed.
8466         if (isa<MemberExpr>(IE)) {
8467           // Insert the pointer into Info to be processed by
8468           // generateInfoForComponentList. Because it is a member pointer
8469           // without a pointee, no entry will be generated for it, therefore
8470           // we need to generate one after the whole struct has been processed.
8471           // Nonetheless, generateInfoForComponentList must be called to take
8472           // the pointer into account for the calculation of the range of the
8473           // partial struct.
8474           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8475                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8476                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8477           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8478         } else {
8479           llvm::Value *Ptr;
8480           if (IE->isGLValue())
8481             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8482           else
8483             Ptr = CGF.EmitScalarExpr(IE);
8484           CombinedInfo.Exprs.push_back(VD);
8485           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8486           CombinedInfo.Pointers.push_back(Ptr);
8487           CombinedInfo.Sizes.push_back(
8488               llvm::Constant::getNullValue(CGF.Int64Ty));
8489           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8490           CombinedInfo.Mappers.push_back(nullptr);
8491         }
8492       }
8493     }
8494 
8495     for (const auto &Data : Info) {
8496       StructRangeInfoTy PartialStruct;
8497       // Temporary generated information.
8498       MapCombinedInfoTy CurInfo;
8499       const Decl *D = Data.first;
8500       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8501       for (const auto &M : Data.second) {
8502         for (const MapInfo &L : M) {
8503           assert(!L.Components.empty() &&
8504                  "Not expecting declaration with no component lists.");
8505 
8506           // Remember the current base pointer index.
8507           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8508           CurInfo.NonContigInfo.IsNonContiguous =
8509               L.Components.back().isNonContiguous();
8510           generateInfoForComponentList(
8511               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8512               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8513               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8514 
8515           // If this entry relates with a device pointer, set the relevant
8516           // declaration and add the 'return pointer' flag.
8517           if (L.ReturnDevicePointer) {
8518             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8519                    "Unexpected number of mapped base pointers.");
8520 
8521             const ValueDecl *RelevantVD =
8522                 L.Components.back().getAssociatedDeclaration();
8523             assert(RelevantVD &&
8524                    "No relevant declaration related with device pointer??");
8525 
8526             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8527                 RelevantVD);
8528             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8529           }
8530         }
8531       }
8532 
8533       // Append any pending zero-length pointers which are struct members and
8534       // used with use_device_ptr or use_device_addr.
8535       auto CI = DeferredInfo.find(Data.first);
8536       if (CI != DeferredInfo.end()) {
8537         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8538           llvm::Value *BasePtr;
8539           llvm::Value *Ptr;
8540           if (L.ForDeviceAddr) {
8541             if (L.IE->isGLValue())
8542               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8543             else
8544               Ptr = this->CGF.EmitScalarExpr(L.IE);
8545             BasePtr = Ptr;
8546             // Entry is RETURN_PARAM. Also, set the placeholder value
8547             // MEMBER_OF=FFFF so that the entry is later updated with the
8548             // correct value of MEMBER_OF.
8549             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8550           } else {
8551             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8552             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8553                                              L.IE->getExprLoc());
8554             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8555             // placeholder value MEMBER_OF=FFFF so that the entry is later
8556             // updated with the correct value of MEMBER_OF.
8557             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8558                                     OMP_MAP_MEMBER_OF);
8559           }
8560           CurInfo.Exprs.push_back(L.VD);
8561           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8562           CurInfo.Pointers.push_back(Ptr);
8563           CurInfo.Sizes.push_back(
8564               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8565           CurInfo.Mappers.push_back(nullptr);
8566         }
8567       }
8568       // If there is an entry in PartialStruct it means we have a struct with
8569       // individual members mapped. Emit an extra combined entry.
8570       if (PartialStruct.Base.isValid()) {
8571         CurInfo.NonContigInfo.Dims.push_back(0);
8572         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8573       }
8574 
8575       // We need to append the results of this capture to what we already
8576       // have.
8577       CombinedInfo.append(CurInfo);
8578     }
8579     // Append data for use_device_ptr clauses.
8580     CombinedInfo.append(UseDevicePtrCombinedInfo);
8581   }
8582 
8583 public:
8584   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8585       : CurDir(&Dir), CGF(CGF) {
8586     // Extract firstprivate clause information.
8587     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8588       for (const auto *D : C->varlists())
8589         FirstPrivateDecls.try_emplace(
8590             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8591     // Extract implicit firstprivates from uses_allocators clauses.
8592     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8593       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8594         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8595         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8596           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8597                                         /*Implicit=*/true);
8598         else if (const auto *VD = dyn_cast<VarDecl>(
8599                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8600                          ->getDecl()))
8601           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8602       }
8603     }
8604     // Extract device pointer clause information.
8605     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8606       for (auto L : C->component_lists())
8607         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8608   }
8609 
8610   /// Constructor for the declare mapper directive.
8611   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8612       : CurDir(&Dir), CGF(CGF) {}
8613 
8614   /// Generate code for the combined entry if we have a partially mapped struct
8615   /// and take care of the mapping flags of the arguments corresponding to
8616   /// individual struct members.
8617   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8618                          MapFlagsArrayTy &CurTypes,
8619                          const StructRangeInfoTy &PartialStruct,
8620                          const ValueDecl *VD = nullptr,
8621                          bool NotTargetParams = true) const {
8622     if (CurTypes.size() == 1 &&
8623         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8624         !PartialStruct.IsArraySection)
8625       return;
8626     Address LBAddr = PartialStruct.LowestElem.second;
8627     Address HBAddr = PartialStruct.HighestElem.second;
8628     if (PartialStruct.HasCompleteRecord) {
8629       LBAddr = PartialStruct.LB;
8630       HBAddr = PartialStruct.LB;
8631     }
8632     CombinedInfo.Exprs.push_back(VD);
8633     // Base is the base of the struct
8634     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8635     // Pointer is the address of the lowest element
8636     llvm::Value *LB = LBAddr.getPointer();
8637     CombinedInfo.Pointers.push_back(LB);
8638     // There should not be a mapper for a combined entry.
8639     CombinedInfo.Mappers.push_back(nullptr);
8640     // Size is (addr of {highest+1} element) - (addr of lowest element)
8641     llvm::Value *HB = HBAddr.getPointer();
8642     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8643     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8644     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8645     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8646     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8647                                                   /*isSigned=*/false);
8648     CombinedInfo.Sizes.push_back(Size);
8649     // Map type is always TARGET_PARAM, if generate info for captures.
8650     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8651                                                  : OMP_MAP_TARGET_PARAM);
8652     // If any element has the present modifier, then make sure the runtime
8653     // doesn't attempt to allocate the struct.
8654     if (CurTypes.end() !=
8655         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8656           return Type & OMP_MAP_PRESENT;
8657         }))
8658       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8659     // Remove TARGET_PARAM flag from the first element
8660     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8661 
8662     // All other current entries will be MEMBER_OF the combined entry
8663     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8664     // 0xFFFF in the MEMBER_OF field).
8665     OpenMPOffloadMappingFlags MemberOfFlag =
8666         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8667     for (auto &M : CurTypes)
8668       setCorrectMemberOfFlag(M, MemberOfFlag);
8669   }
8670 
8671   /// Generate all the base pointers, section pointers, sizes, map types, and
8672   /// mappers for the extracted mappable expressions (all included in \a
8673   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8674   /// pair of the relevant declaration and index where it occurs is appended to
8675   /// the device pointers info array.
8676   void generateAllInfo(
8677       MapCombinedInfoTy &CombinedInfo,
8678       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8679           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8680     assert(CurDir.is<const OMPExecutableDirective *>() &&
8681            "Expect a executable directive");
8682     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8683     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8684   }
8685 
8686   /// Generate all the base pointers, section pointers, sizes, map types, and
8687   /// mappers for the extracted map clauses of user-defined mapper (all included
8688   /// in \a CombinedInfo).
8689   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8690     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8691            "Expect a declare mapper directive");
8692     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8693     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8694   }
8695 
8696   /// Emit capture info for lambdas for variables captured by reference.
8697   void generateInfoForLambdaCaptures(
8698       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8699       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8700     const auto *RD = VD->getType()
8701                          .getCanonicalType()
8702                          .getNonReferenceType()
8703                          ->getAsCXXRecordDecl();
8704     if (!RD || !RD->isLambda())
8705       return;
8706     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8707     LValue VDLVal = CGF.MakeAddrLValue(
8708         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8709     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8710     FieldDecl *ThisCapture = nullptr;
8711     RD->getCaptureFields(Captures, ThisCapture);
8712     if (ThisCapture) {
8713       LValue ThisLVal =
8714           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8715       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8716       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8717                                  VDLVal.getPointer(CGF));
8718       CombinedInfo.Exprs.push_back(VD);
8719       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8720       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8721       CombinedInfo.Sizes.push_back(
8722           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8723                                     CGF.Int64Ty, /*isSigned=*/true));
8724       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8725                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8726       CombinedInfo.Mappers.push_back(nullptr);
8727     }
8728     for (const LambdaCapture &LC : RD->captures()) {
8729       if (!LC.capturesVariable())
8730         continue;
8731       const VarDecl *VD = LC.getCapturedVar();
8732       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8733         continue;
8734       auto It = Captures.find(VD);
8735       assert(It != Captures.end() && "Found lambda capture without field.");
8736       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8737       if (LC.getCaptureKind() == LCK_ByRef) {
8738         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8739         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8740                                    VDLVal.getPointer(CGF));
8741         CombinedInfo.Exprs.push_back(VD);
8742         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8743         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8744         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8745             CGF.getTypeSize(
8746                 VD->getType().getCanonicalType().getNonReferenceType()),
8747             CGF.Int64Ty, /*isSigned=*/true));
8748       } else {
8749         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8750         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8751                                    VDLVal.getPointer(CGF));
8752         CombinedInfo.Exprs.push_back(VD);
8753         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8754         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8755         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8756       }
8757       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8758                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8759       CombinedInfo.Mappers.push_back(nullptr);
8760     }
8761   }
8762 
8763   /// Set correct indices for lambdas captures.
8764   void adjustMemberOfForLambdaCaptures(
8765       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8766       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8767       MapFlagsArrayTy &Types) const {
8768     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8769       // Set correct member_of idx for all implicit lambda captures.
8770       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8771                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8772         continue;
8773       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8774       assert(BasePtr && "Unable to find base lambda address.");
8775       int TgtIdx = -1;
8776       for (unsigned J = I; J > 0; --J) {
8777         unsigned Idx = J - 1;
8778         if (Pointers[Idx] != BasePtr)
8779           continue;
8780         TgtIdx = Idx;
8781         break;
8782       }
8783       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8784       // All other current entries will be MEMBER_OF the combined entry
8785       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8786       // 0xFFFF in the MEMBER_OF field).
8787       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8788       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8789     }
8790   }
8791 
8792   /// Generate the base pointers, section pointers, sizes, map types, and
8793   /// mappers associated to a given capture (all included in \a CombinedInfo).
8794   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8795                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8796                               StructRangeInfoTy &PartialStruct) const {
8797     assert(!Cap->capturesVariableArrayType() &&
8798            "Not expecting to generate map info for a variable array type!");
8799 
8800     // We need to know when we generating information for the first component
8801     const ValueDecl *VD = Cap->capturesThis()
8802                               ? nullptr
8803                               : Cap->getCapturedVar()->getCanonicalDecl();
8804 
8805     // If this declaration appears in a is_device_ptr clause we just have to
8806     // pass the pointer by value. If it is a reference to a declaration, we just
8807     // pass its value.
8808     if (DevPointersMap.count(VD)) {
8809       CombinedInfo.Exprs.push_back(VD);
8810       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8811       CombinedInfo.Pointers.push_back(Arg);
8812       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8813           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8814           /*isSigned=*/true));
8815       CombinedInfo.Types.push_back(
8816           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8817           OMP_MAP_TARGET_PARAM);
8818       CombinedInfo.Mappers.push_back(nullptr);
8819       return;
8820     }
8821 
8822     using MapData =
8823         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8824                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8825                    const ValueDecl *, const Expr *>;
8826     SmallVector<MapData, 4> DeclComponentLists;
8827     assert(CurDir.is<const OMPExecutableDirective *>() &&
8828            "Expect a executable directive");
8829     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8830     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8831       const auto *EI = C->getVarRefs().begin();
8832       for (const auto L : C->decl_component_lists(VD)) {
8833         const ValueDecl *VDecl, *Mapper;
8834         // The Expression is not correct if the mapping is implicit
8835         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8836         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8837         std::tie(VDecl, Components, Mapper) = L;
8838         assert(VDecl == VD && "We got information for the wrong declaration??");
8839         assert(!Components.empty() &&
8840                "Not expecting declaration with no component lists.");
8841         DeclComponentLists.emplace_back(Components, C->getMapType(),
8842                                         C->getMapTypeModifiers(),
8843                                         C->isImplicit(), Mapper, E);
8844         ++EI;
8845       }
8846     }
8847     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8848                                              const MapData &RHS) {
8849       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8850       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8851       bool HasPresent = !MapModifiers.empty() &&
8852                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8853                           return K == clang::OMPC_MAP_MODIFIER_present;
8854                         });
8855       bool HasAllocs = MapType == OMPC_MAP_alloc;
8856       MapModifiers = std::get<2>(RHS);
8857       MapType = std::get<1>(LHS);
8858       bool HasPresentR =
8859           !MapModifiers.empty() &&
8860           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8861             return K == clang::OMPC_MAP_MODIFIER_present;
8862           });
8863       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8864       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8865     });
8866 
8867     // Find overlapping elements (including the offset from the base element).
8868     llvm::SmallDenseMap<
8869         const MapData *,
8870         llvm::SmallVector<
8871             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8872         4>
8873         OverlappedData;
8874     size_t Count = 0;
8875     for (const MapData &L : DeclComponentLists) {
8876       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8877       OpenMPMapClauseKind MapType;
8878       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8879       bool IsImplicit;
8880       const ValueDecl *Mapper;
8881       const Expr *VarRef;
8882       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8883           L;
8884       ++Count;
8885       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8886         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8887         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8888                  VarRef) = L1;
8889         auto CI = Components.rbegin();
8890         auto CE = Components.rend();
8891         auto SI = Components1.rbegin();
8892         auto SE = Components1.rend();
8893         for (; CI != CE && SI != SE; ++CI, ++SI) {
8894           if (CI->getAssociatedExpression()->getStmtClass() !=
8895               SI->getAssociatedExpression()->getStmtClass())
8896             break;
8897           // Are we dealing with different variables/fields?
8898           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8899             break;
8900         }
8901         // Found overlapping if, at least for one component, reached the head
8902         // of the components list.
8903         if (CI == CE || SI == SE) {
8904           // Ignore it if it is the same component.
8905           if (CI == CE && SI == SE)
8906             continue;
8907           const auto It = (SI == SE) ? CI : SI;
8908           // If one component is a pointer and another one is a kind of
8909           // dereference of this pointer (array subscript, section, dereference,
8910           // etc.), it is not an overlapping.
8911           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8912               std::prev(It)
8913                   ->getAssociatedExpression()
8914                   ->getType()
8915                   .getNonReferenceType()
8916                   ->isPointerType())
8917             continue;
8918           const MapData &BaseData = CI == CE ? L : L1;
8919           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8920               SI == SE ? Components : Components1;
8921           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8922           OverlappedElements.getSecond().push_back(SubData);
8923         }
8924       }
8925     }
8926     // Sort the overlapped elements for each item.
8927     llvm::SmallVector<const FieldDecl *, 4> Layout;
8928     if (!OverlappedData.empty()) {
8929       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8930       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8931       while (BaseType != OrigType) {
8932         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8933         OrigType = BaseType->getPointeeOrArrayElementType();
8934       }
8935 
8936       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8937         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8938       else {
8939         const auto *RD = BaseType->getAsRecordDecl();
8940         Layout.append(RD->field_begin(), RD->field_end());
8941       }
8942     }
8943     for (auto &Pair : OverlappedData) {
8944       llvm::stable_sort(
8945           Pair.getSecond(),
8946           [&Layout](
8947               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8948               OMPClauseMappableExprCommon::MappableExprComponentListRef
8949                   Second) {
8950             auto CI = First.rbegin();
8951             auto CE = First.rend();
8952             auto SI = Second.rbegin();
8953             auto SE = Second.rend();
8954             for (; CI != CE && SI != SE; ++CI, ++SI) {
8955               if (CI->getAssociatedExpression()->getStmtClass() !=
8956                   SI->getAssociatedExpression()->getStmtClass())
8957                 break;
8958               // Are we dealing with different variables/fields?
8959               if (CI->getAssociatedDeclaration() !=
8960                   SI->getAssociatedDeclaration())
8961                 break;
8962             }
8963 
8964             // Lists contain the same elements.
8965             if (CI == CE && SI == SE)
8966               return false;
8967 
8968             // List with less elements is less than list with more elements.
8969             if (CI == CE || SI == SE)
8970               return CI == CE;
8971 
8972             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8973             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8974             if (FD1->getParent() == FD2->getParent())
8975               return FD1->getFieldIndex() < FD2->getFieldIndex();
8976             const auto It =
8977                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8978                   return FD == FD1 || FD == FD2;
8979                 });
8980             return *It == FD1;
8981           });
8982     }
8983 
8984     // Associated with a capture, because the mapping flags depend on it.
8985     // Go through all of the elements with the overlapped elements.
8986     bool IsFirstComponentList = true;
8987     for (const auto &Pair : OverlappedData) {
8988       const MapData &L = *Pair.getFirst();
8989       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8990       OpenMPMapClauseKind MapType;
8991       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8992       bool IsImplicit;
8993       const ValueDecl *Mapper;
8994       const Expr *VarRef;
8995       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8996           L;
8997       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8998           OverlappedComponents = Pair.getSecond();
8999       generateInfoForComponentList(
9000           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9001           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9002           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9003       IsFirstComponentList = false;
9004     }
9005     // Go through other elements without overlapped elements.
9006     for (const MapData &L : DeclComponentLists) {
9007       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9008       OpenMPMapClauseKind MapType;
9009       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9010       bool IsImplicit;
9011       const ValueDecl *Mapper;
9012       const Expr *VarRef;
9013       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9014           L;
9015       auto It = OverlappedData.find(&L);
9016       if (It == OverlappedData.end())
9017         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9018                                      Components, CombinedInfo, PartialStruct,
9019                                      IsFirstComponentList, IsImplicit, Mapper,
9020                                      /*ForDeviceAddr=*/false, VD, VarRef);
9021       IsFirstComponentList = false;
9022     }
9023   }
9024 
9025   /// Generate the default map information for a given capture \a CI,
9026   /// record field declaration \a RI and captured value \a CV.
9027   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9028                               const FieldDecl &RI, llvm::Value *CV,
9029                               MapCombinedInfoTy &CombinedInfo) const {
9030     bool IsImplicit = true;
9031     // Do the default mapping.
9032     if (CI.capturesThis()) {
9033       CombinedInfo.Exprs.push_back(nullptr);
9034       CombinedInfo.BasePointers.push_back(CV);
9035       CombinedInfo.Pointers.push_back(CV);
9036       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9037       CombinedInfo.Sizes.push_back(
9038           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9039                                     CGF.Int64Ty, /*isSigned=*/true));
9040       // Default map type.
9041       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9042     } else if (CI.capturesVariableByCopy()) {
9043       const VarDecl *VD = CI.getCapturedVar();
9044       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9045       CombinedInfo.BasePointers.push_back(CV);
9046       CombinedInfo.Pointers.push_back(CV);
9047       if (!RI.getType()->isAnyPointerType()) {
9048         // We have to signal to the runtime captures passed by value that are
9049         // not pointers.
9050         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9051         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9052             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9053       } else {
9054         // Pointers are implicitly mapped with a zero size and no flags
9055         // (other than first map that is added for all implicit maps).
9056         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9057         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9058       }
9059       auto I = FirstPrivateDecls.find(VD);
9060       if (I != FirstPrivateDecls.end())
9061         IsImplicit = I->getSecond();
9062     } else {
9063       assert(CI.capturesVariable() && "Expected captured reference.");
9064       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9065       QualType ElementType = PtrTy->getPointeeType();
9066       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9067           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9068       // The default map type for a scalar/complex type is 'to' because by
9069       // default the value doesn't have to be retrieved. For an aggregate
9070       // type, the default is 'tofrom'.
9071       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9072       const VarDecl *VD = CI.getCapturedVar();
9073       auto I = FirstPrivateDecls.find(VD);
9074       if (I != FirstPrivateDecls.end() &&
9075           VD->getType().isConstant(CGF.getContext())) {
9076         llvm::Constant *Addr =
9077             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9078         // Copy the value of the original variable to the new global copy.
9079         CGF.Builder.CreateMemCpy(
9080             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9081             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9082             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9083         // Use new global variable as the base pointers.
9084         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9085         CombinedInfo.BasePointers.push_back(Addr);
9086         CombinedInfo.Pointers.push_back(Addr);
9087       } else {
9088         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9089         CombinedInfo.BasePointers.push_back(CV);
9090         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9091           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9092               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9093               AlignmentSource::Decl));
9094           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9095         } else {
9096           CombinedInfo.Pointers.push_back(CV);
9097         }
9098       }
9099       if (I != FirstPrivateDecls.end())
9100         IsImplicit = I->getSecond();
9101     }
9102     // Every default map produces a single argument which is a target parameter.
9103     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9104 
9105     // Add flag stating this is an implicit map.
9106     if (IsImplicit)
9107       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9108 
9109     // No user-defined mapper for default mapping.
9110     CombinedInfo.Mappers.push_back(nullptr);
9111   }
9112 };
9113 } // anonymous namespace
9114 
9115 static void emitNonContiguousDescriptor(
9116     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9117     CGOpenMPRuntime::TargetDataInfo &Info) {
9118   CodeGenModule &CGM = CGF.CGM;
9119   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9120       &NonContigInfo = CombinedInfo.NonContigInfo;
9121 
9122   // Build an array of struct descriptor_dim and then assign it to
9123   // offload_args.
9124   //
9125   // struct descriptor_dim {
9126   //  uint64_t offset;
9127   //  uint64_t count;
9128   //  uint64_t stride
9129   // };
9130   ASTContext &C = CGF.getContext();
9131   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9132   RecordDecl *RD;
9133   RD = C.buildImplicitRecord("descriptor_dim");
9134   RD->startDefinition();
9135   addFieldToRecordDecl(C, RD, Int64Ty);
9136   addFieldToRecordDecl(C, RD, Int64Ty);
9137   addFieldToRecordDecl(C, RD, Int64Ty);
9138   RD->completeDefinition();
9139   QualType DimTy = C.getRecordType(RD);
9140 
9141   enum { OffsetFD = 0, CountFD, StrideFD };
9142   // We need two index variable here since the size of "Dims" is the same as the
9143   // size of Components, however, the size of offset, count, and stride is equal
9144   // to the size of base declaration that is non-contiguous.
9145   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9146     // Skip emitting ir if dimension size is 1 since it cannot be
9147     // non-contiguous.
9148     if (NonContigInfo.Dims[I] == 1)
9149       continue;
9150     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9151     QualType ArrayTy =
9152         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9153     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9154     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9155       unsigned RevIdx = EE - II - 1;
9156       LValue DimsLVal = CGF.MakeAddrLValue(
9157           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9158       // Offset
9159       LValue OffsetLVal = CGF.EmitLValueForField(
9160           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9161       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9162       // Count
9163       LValue CountLVal = CGF.EmitLValueForField(
9164           DimsLVal, *std::next(RD->field_begin(), CountFD));
9165       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9166       // Stride
9167       LValue StrideLVal = CGF.EmitLValueForField(
9168           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9169       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9170     }
9171     // args[I] = &dims
9172     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9173         DimsAddr, CGM.Int8PtrTy);
9174     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9175         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9176         Info.PointersArray, 0, I);
9177     Address PAddr(P, CGF.getPointerAlign());
9178     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9179     ++L;
9180   }
9181 }
9182 
9183 /// Emit a string constant containing the names of the values mapped to the
9184 /// offloading runtime library.
9185 llvm::Constant *
9186 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9187                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9188   llvm::Constant *SrcLocStr;
9189   if (!MapExprs.getMapDecl()) {
9190     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9191   } else {
9192     std::string ExprName = "";
9193     if (MapExprs.getMapExpr()) {
9194       PrintingPolicy P(CGF.getContext().getLangOpts());
9195       llvm::raw_string_ostream OS(ExprName);
9196       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9197       OS.flush();
9198     } else {
9199       ExprName = MapExprs.getMapDecl()->getNameAsString();
9200     }
9201 
9202     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9203     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9204     const char *FileName = PLoc.getFilename();
9205     unsigned Line = PLoc.getLine();
9206     unsigned Column = PLoc.getColumn();
9207     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9208                                                 Line, Column);
9209   }
9210 
9211   return SrcLocStr;
9212 }
9213 
9214 /// Emit the arrays used to pass the captures and map information to the
9215 /// offloading runtime library. If there is no map or capture information,
9216 /// return nullptr by reference.
9217 static void emitOffloadingArrays(
9218     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9219     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9220     bool IsNonContiguous = false) {
9221   CodeGenModule &CGM = CGF.CGM;
9222   ASTContext &Ctx = CGF.getContext();
9223 
9224   // Reset the array information.
9225   Info.clearArrayInfo();
9226   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9227 
9228   if (Info.NumberOfPtrs) {
9229     // Detect if we have any capture size requiring runtime evaluation of the
9230     // size so that a constant array could be eventually used.
9231     bool hasRuntimeEvaluationCaptureSize = false;
9232     for (llvm::Value *S : CombinedInfo.Sizes)
9233       if (!isa<llvm::Constant>(S)) {
9234         hasRuntimeEvaluationCaptureSize = true;
9235         break;
9236       }
9237 
9238     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9239     QualType PointerArrayType = Ctx.getConstantArrayType(
9240         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9241         /*IndexTypeQuals=*/0);
9242 
9243     Info.BasePointersArray =
9244         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9245     Info.PointersArray =
9246         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9247     Address MappersArray =
9248         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9249     Info.MappersArray = MappersArray.getPointer();
9250 
9251     // If we don't have any VLA types or other types that require runtime
9252     // evaluation, we can use a constant array for the map sizes, otherwise we
9253     // need to fill up the arrays as we do for the pointers.
9254     QualType Int64Ty =
9255         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9256     if (hasRuntimeEvaluationCaptureSize) {
9257       QualType SizeArrayType = Ctx.getConstantArrayType(
9258           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9259           /*IndexTypeQuals=*/0);
9260       Info.SizesArray =
9261           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9262     } else {
9263       // We expect all the sizes to be constant, so we collect them to create
9264       // a constant array.
9265       SmallVector<llvm::Constant *, 16> ConstSizes;
9266       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9267         if (IsNonContiguous &&
9268             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9269           ConstSizes.push_back(llvm::ConstantInt::get(
9270               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9271         } else {
9272           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9273         }
9274       }
9275 
9276       auto *SizesArrayInit = llvm::ConstantArray::get(
9277           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9278       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9279       auto *SizesArrayGbl = new llvm::GlobalVariable(
9280           CGM.getModule(), SizesArrayInit->getType(),
9281           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9282           SizesArrayInit, Name);
9283       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9284       Info.SizesArray = SizesArrayGbl;
9285     }
9286 
9287     // The map types are always constant so we don't need to generate code to
9288     // fill arrays. Instead, we create an array constant.
9289     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9290     llvm::copy(CombinedInfo.Types, Mapping.begin());
9291     llvm::Constant *MapTypesArrayInit =
9292         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9293     std::string MaptypesName =
9294         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9295     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9296         CGM.getModule(), MapTypesArrayInit->getType(),
9297         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9298         MapTypesArrayInit, MaptypesName);
9299     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9300     Info.MapTypesArray = MapTypesArrayGbl;
9301 
9302     // The information types are only built if there is debug information
9303     // requested.
9304     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9305       Info.MapNamesArray = llvm::Constant::getNullValue(
9306           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9307     } else {
9308       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9309         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9310       };
9311       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9312       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9313 
9314       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9315           llvm::ArrayType::get(
9316               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9317               CombinedInfo.Exprs.size()),
9318           InfoMap);
9319       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9320           CGM.getModule(), MapNamesArrayInit->getType(),
9321           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9322           MapNamesArrayInit,
9323           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9324       Info.MapNamesArray = MapNamesArrayGbl;
9325     }
9326 
9327     // If there's a present map type modifier, it must not be applied to the end
9328     // of a region, so generate a separate map type array in that case.
9329     if (Info.separateBeginEndCalls()) {
9330       bool EndMapTypesDiffer = false;
9331       for (uint64_t &Type : Mapping) {
9332         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9333           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9334           EndMapTypesDiffer = true;
9335         }
9336       }
9337       if (EndMapTypesDiffer) {
9338         MapTypesArrayInit =
9339             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9340         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9341         MapTypesArrayGbl = new llvm::GlobalVariable(
9342             CGM.getModule(), MapTypesArrayInit->getType(),
9343             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9344             MapTypesArrayInit, MaptypesName);
9345         MapTypesArrayGbl->setUnnamedAddr(
9346             llvm::GlobalValue::UnnamedAddr::Global);
9347         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9348       }
9349     }
9350 
9351     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9352       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9353       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9354           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9355           Info.BasePointersArray, 0, I);
9356       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9357           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9358       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9359       CGF.Builder.CreateStore(BPVal, BPAddr);
9360 
9361       if (Info.requiresDevicePointerInfo())
9362         if (const ValueDecl *DevVD =
9363                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9364           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9365 
9366       llvm::Value *PVal = CombinedInfo.Pointers[I];
9367       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9368           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9369           Info.PointersArray, 0, I);
9370       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9371           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9372       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9373       CGF.Builder.CreateStore(PVal, PAddr);
9374 
9375       if (hasRuntimeEvaluationCaptureSize) {
9376         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9377             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9378             Info.SizesArray,
9379             /*Idx0=*/0,
9380             /*Idx1=*/I);
9381         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9382         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9383                                                           CGM.Int64Ty,
9384                                                           /*isSigned=*/true),
9385                                 SAddr);
9386       }
9387 
9388       // Fill up the mapper array.
9389       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9390       if (CombinedInfo.Mappers[I]) {
9391         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9392             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9393         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9394         Info.HasMapper = true;
9395       }
9396       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9397       CGF.Builder.CreateStore(MFunc, MAddr);
9398     }
9399   }
9400 
9401   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9402       Info.NumberOfPtrs == 0)
9403     return;
9404 
9405   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9406 }
9407 
9408 namespace {
9409 /// Additional arguments for emitOffloadingArraysArgument function.
9410 struct ArgumentsOptions {
9411   bool ForEndCall = false;
9412   ArgumentsOptions() = default;
9413   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9414 };
9415 } // namespace
9416 
9417 /// Emit the arguments to be passed to the runtime library based on the
9418 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9419 /// ForEndCall, emit map types to be passed for the end of the region instead of
9420 /// the beginning.
9421 static void emitOffloadingArraysArgument(
9422     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9423     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9424     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9425     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9426     const ArgumentsOptions &Options = ArgumentsOptions()) {
9427   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9428          "expected region end call to runtime only when end call is separate");
9429   CodeGenModule &CGM = CGF.CGM;
9430   if (Info.NumberOfPtrs) {
9431     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9432         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9433         Info.BasePointersArray,
9434         /*Idx0=*/0, /*Idx1=*/0);
9435     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9436         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9437         Info.PointersArray,
9438         /*Idx0=*/0,
9439         /*Idx1=*/0);
9440     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9441         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9442         /*Idx0=*/0, /*Idx1=*/0);
9443     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9444         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9445         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9446                                                     : Info.MapTypesArray,
9447         /*Idx0=*/0,
9448         /*Idx1=*/0);
9449 
9450     // Only emit the mapper information arrays if debug information is
9451     // requested.
9452     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9453       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9454     else
9455       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9456           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9457           Info.MapNamesArray,
9458           /*Idx0=*/0,
9459           /*Idx1=*/0);
9460     // If there is no user-defined mapper, set the mapper array to nullptr to
9461     // avoid an unnecessary data privatization
9462     if (!Info.HasMapper)
9463       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9464     else
9465       MappersArrayArg =
9466           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9467   } else {
9468     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9469     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9470     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9471     MapTypesArrayArg =
9472         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9473     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9474     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9475   }
9476 }
9477 
9478 /// Check for inner distribute directive.
9479 static const OMPExecutableDirective *
9480 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9481   const auto *CS = D.getInnermostCapturedStmt();
9482   const auto *Body =
9483       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9484   const Stmt *ChildStmt =
9485       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9486 
9487   if (const auto *NestedDir =
9488           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9489     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9490     switch (D.getDirectiveKind()) {
9491     case OMPD_target:
9492       if (isOpenMPDistributeDirective(DKind))
9493         return NestedDir;
9494       if (DKind == OMPD_teams) {
9495         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9496             /*IgnoreCaptured=*/true);
9497         if (!Body)
9498           return nullptr;
9499         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9500         if (const auto *NND =
9501                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9502           DKind = NND->getDirectiveKind();
9503           if (isOpenMPDistributeDirective(DKind))
9504             return NND;
9505         }
9506       }
9507       return nullptr;
9508     case OMPD_target_teams:
9509       if (isOpenMPDistributeDirective(DKind))
9510         return NestedDir;
9511       return nullptr;
9512     case OMPD_target_parallel:
9513     case OMPD_target_simd:
9514     case OMPD_target_parallel_for:
9515     case OMPD_target_parallel_for_simd:
9516       return nullptr;
9517     case OMPD_target_teams_distribute:
9518     case OMPD_target_teams_distribute_simd:
9519     case OMPD_target_teams_distribute_parallel_for:
9520     case OMPD_target_teams_distribute_parallel_for_simd:
9521     case OMPD_parallel:
9522     case OMPD_for:
9523     case OMPD_parallel_for:
9524     case OMPD_parallel_master:
9525     case OMPD_parallel_sections:
9526     case OMPD_for_simd:
9527     case OMPD_parallel_for_simd:
9528     case OMPD_cancel:
9529     case OMPD_cancellation_point:
9530     case OMPD_ordered:
9531     case OMPD_threadprivate:
9532     case OMPD_allocate:
9533     case OMPD_task:
9534     case OMPD_simd:
9535     case OMPD_tile:
9536     case OMPD_sections:
9537     case OMPD_section:
9538     case OMPD_single:
9539     case OMPD_master:
9540     case OMPD_critical:
9541     case OMPD_taskyield:
9542     case OMPD_barrier:
9543     case OMPD_taskwait:
9544     case OMPD_taskgroup:
9545     case OMPD_atomic:
9546     case OMPD_flush:
9547     case OMPD_depobj:
9548     case OMPD_scan:
9549     case OMPD_teams:
9550     case OMPD_target_data:
9551     case OMPD_target_exit_data:
9552     case OMPD_target_enter_data:
9553     case OMPD_distribute:
9554     case OMPD_distribute_simd:
9555     case OMPD_distribute_parallel_for:
9556     case OMPD_distribute_parallel_for_simd:
9557     case OMPD_teams_distribute:
9558     case OMPD_teams_distribute_simd:
9559     case OMPD_teams_distribute_parallel_for:
9560     case OMPD_teams_distribute_parallel_for_simd:
9561     case OMPD_target_update:
9562     case OMPD_declare_simd:
9563     case OMPD_declare_variant:
9564     case OMPD_begin_declare_variant:
9565     case OMPD_end_declare_variant:
9566     case OMPD_declare_target:
9567     case OMPD_end_declare_target:
9568     case OMPD_declare_reduction:
9569     case OMPD_declare_mapper:
9570     case OMPD_taskloop:
9571     case OMPD_taskloop_simd:
9572     case OMPD_master_taskloop:
9573     case OMPD_master_taskloop_simd:
9574     case OMPD_parallel_master_taskloop:
9575     case OMPD_parallel_master_taskloop_simd:
9576     case OMPD_requires:
9577     case OMPD_unknown:
9578     default:
9579       llvm_unreachable("Unexpected directive.");
9580     }
9581   }
9582 
9583   return nullptr;
9584 }
9585 
9586 /// Emit the user-defined mapper function. The code generation follows the
9587 /// pattern in the example below.
9588 /// \code
9589 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9590 ///                                           void *base, void *begin,
9591 ///                                           int64_t size, int64_t type,
9592 ///                                           void *name = nullptr) {
9593 ///   // Allocate space for an array section first or add a base/begin for
9594 ///   // pointer dereference.
9595 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9596 ///       !maptype.IsDelete)
9597 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9598 ///                                 size*sizeof(Ty), clearToFromMember(type));
9599 ///   // Map members.
9600 ///   for (unsigned i = 0; i < size; i++) {
9601 ///     // For each component specified by this mapper:
9602 ///     for (auto c : begin[i]->all_components) {
9603 ///       if (c.hasMapper())
9604 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9605 ///                       c.arg_type, c.arg_name);
9606 ///       else
9607 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9608 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9609 ///                                     c.arg_name);
9610 ///     }
9611 ///   }
9612 ///   // Delete the array section.
9613 ///   if (size > 1 && maptype.IsDelete)
9614 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9615 ///                                 size*sizeof(Ty), clearToFromMember(type));
9616 /// }
9617 /// \endcode
9618 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9619                                             CodeGenFunction *CGF) {
9620   if (UDMMap.count(D) > 0)
9621     return;
9622   ASTContext &C = CGM.getContext();
9623   QualType Ty = D->getType();
9624   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9625   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9626   auto *MapperVarDecl =
9627       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9628   SourceLocation Loc = D->getLocation();
9629   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9630 
9631   // Prepare mapper function arguments and attributes.
9632   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9633                               C.VoidPtrTy, ImplicitParamDecl::Other);
9634   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9635                             ImplicitParamDecl::Other);
9636   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9637                              C.VoidPtrTy, ImplicitParamDecl::Other);
9638   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9639                             ImplicitParamDecl::Other);
9640   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9641                             ImplicitParamDecl::Other);
9642   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9643                             ImplicitParamDecl::Other);
9644   FunctionArgList Args;
9645   Args.push_back(&HandleArg);
9646   Args.push_back(&BaseArg);
9647   Args.push_back(&BeginArg);
9648   Args.push_back(&SizeArg);
9649   Args.push_back(&TypeArg);
9650   Args.push_back(&NameArg);
9651   const CGFunctionInfo &FnInfo =
9652       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9653   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9654   SmallString<64> TyStr;
9655   llvm::raw_svector_ostream Out(TyStr);
9656   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9657   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9658   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9659                                     Name, &CGM.getModule());
9660   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9661   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9662   // Start the mapper function code generation.
9663   CodeGenFunction MapperCGF(CGM);
9664   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9665   // Compute the starting and end addresses of array elements.
9666   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9667       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9668       C.getPointerType(Int64Ty), Loc);
9669   // Prepare common arguments for array initiation and deletion.
9670   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9671       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9672       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9673   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9674       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9675       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9676   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9677       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9678       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9679   // Convert the size in bytes into the number of array elements.
9680   Size = MapperCGF.Builder.CreateExactUDiv(
9681       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9682   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9683       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9684   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9685   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9686       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9687       C.getPointerType(Int64Ty), Loc);
9688 
9689   // Emit array initiation if this is an array section and \p MapType indicates
9690   // that memory allocation is required.
9691   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9692   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9693                              ElementSize, HeadBB, /*IsInit=*/true);
9694 
9695   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9696 
9697   // Emit the loop header block.
9698   MapperCGF.EmitBlock(HeadBB);
9699   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9700   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9701   // Evaluate whether the initial condition is satisfied.
9702   llvm::Value *IsEmpty =
9703       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9704   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9705   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9706 
9707   // Emit the loop body block.
9708   MapperCGF.EmitBlock(BodyBB);
9709   llvm::BasicBlock *LastBB = BodyBB;
9710   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9711       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9712   PtrPHI->addIncoming(PtrBegin, EntryBB);
9713   Address PtrCurrent =
9714       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9715                           .getAlignment()
9716                           .alignmentOfArrayElement(ElementSize));
9717   // Privatize the declared variable of mapper to be the current array element.
9718   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9719   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9720   (void)Scope.Privatize();
9721 
9722   // Get map clause information. Fill up the arrays with all mapped variables.
9723   MappableExprsHandler::MapCombinedInfoTy Info;
9724   MappableExprsHandler MEHandler(*D, MapperCGF);
9725   MEHandler.generateAllInfoForMapper(Info);
9726 
9727   // Call the runtime API __tgt_mapper_num_components to get the number of
9728   // pre-existing components.
9729   llvm::Value *OffloadingArgs[] = {Handle};
9730   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9731       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9732                                             OMPRTL___tgt_mapper_num_components),
9733       OffloadingArgs);
9734   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9735       PreviousSize,
9736       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9737 
9738   // Fill up the runtime mapper handle for all components.
9739   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9740     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9741         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9742     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9743         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9744     llvm::Value *CurSizeArg = Info.Sizes[I];
9745     llvm::Value *CurNameArg =
9746         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9747             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9748             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9749 
9750     // Extract the MEMBER_OF field from the map type.
9751     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9752     llvm::Value *MemberMapType =
9753         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9754 
9755     // Combine the map type inherited from user-defined mapper with that
9756     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9757     // bits of the \a MapType, which is the input argument of the mapper
9758     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9759     // bits of MemberMapType.
9760     // [OpenMP 5.0], 1.2.6. map-type decay.
9761     //        | alloc |  to   | from  | tofrom | release | delete
9762     // ----------------------------------------------------------
9763     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9764     // to     | alloc |  to   | alloc |   to   | release | delete
9765     // from   | alloc | alloc | from  |  from  | release | delete
9766     // tofrom | alloc |  to   | from  | tofrom | release | delete
9767     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9768         MapType,
9769         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9770                                    MappableExprsHandler::OMP_MAP_FROM));
9771     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9772     llvm::BasicBlock *AllocElseBB =
9773         MapperCGF.createBasicBlock("omp.type.alloc.else");
9774     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9775     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9776     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9777     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9778     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9779     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9780     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9781     MapperCGF.EmitBlock(AllocBB);
9782     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9783         MemberMapType,
9784         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9785                                      MappableExprsHandler::OMP_MAP_FROM)));
9786     MapperCGF.Builder.CreateBr(EndBB);
9787     MapperCGF.EmitBlock(AllocElseBB);
9788     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9789         LeftToFrom,
9790         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9791     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9792     // In case of to, clear OMP_MAP_FROM.
9793     MapperCGF.EmitBlock(ToBB);
9794     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9795         MemberMapType,
9796         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9797     MapperCGF.Builder.CreateBr(EndBB);
9798     MapperCGF.EmitBlock(ToElseBB);
9799     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9800         LeftToFrom,
9801         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9802     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9803     // In case of from, clear OMP_MAP_TO.
9804     MapperCGF.EmitBlock(FromBB);
9805     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9806         MemberMapType,
9807         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9808     // In case of tofrom, do nothing.
9809     MapperCGF.EmitBlock(EndBB);
9810     LastBB = EndBB;
9811     llvm::PHINode *CurMapType =
9812         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9813     CurMapType->addIncoming(AllocMapType, AllocBB);
9814     CurMapType->addIncoming(ToMapType, ToBB);
9815     CurMapType->addIncoming(FromMapType, FromBB);
9816     CurMapType->addIncoming(MemberMapType, ToElseBB);
9817 
9818     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9819                                      CurSizeArg, CurMapType, CurNameArg};
9820     if (Info.Mappers[I]) {
9821       // Call the corresponding mapper function.
9822       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9823           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9824       assert(MapperFunc && "Expect a valid mapper function is available.");
9825       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9826     } else {
9827       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9828       // data structure.
9829       MapperCGF.EmitRuntimeCall(
9830           OMPBuilder.getOrCreateRuntimeFunction(
9831               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9832           OffloadingArgs);
9833     }
9834   }
9835 
9836   // Update the pointer to point to the next element that needs to be mapped,
9837   // and check whether we have mapped all elements.
9838   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9839       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9840   PtrPHI->addIncoming(PtrNext, LastBB);
9841   llvm::Value *IsDone =
9842       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9843   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9844   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9845 
9846   MapperCGF.EmitBlock(ExitBB);
9847   // Emit array deletion if this is an array section and \p MapType indicates
9848   // that deletion is required.
9849   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9850                              ElementSize, DoneBB, /*IsInit=*/false);
9851 
9852   // Emit the function exit block.
9853   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9854   MapperCGF.FinishFunction();
9855   UDMMap.try_emplace(D, Fn);
9856   if (CGF) {
9857     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9858     Decls.second.push_back(D);
9859   }
9860 }
9861 
9862 /// Emit the array initialization or deletion portion for user-defined mapper
9863 /// code generation. First, it evaluates whether an array section is mapped and
9864 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9865 /// true, and \a MapType indicates to not delete this array, array
9866 /// initialization code is generated. If \a IsInit is false, and \a MapType
9867 /// indicates to not this array, array deletion code is generated.
9868 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9869     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9870     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9871     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9872   StringRef Prefix = IsInit ? ".init" : ".del";
9873 
9874   // Evaluate if this is an array section.
9875   llvm::BasicBlock *BodyBB =
9876       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9877   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9878       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9879   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9880       MapType,
9881       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9882   llvm::Value *DeleteCond;
9883   llvm::Value *Cond;
9884   if (IsInit) {
9885     // base != begin?
9886     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9887         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9888     // IsPtrAndObj?
9889     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9890         MapType,
9891         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9892     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9893     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9894     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9895     DeleteCond = MapperCGF.Builder.CreateIsNull(
9896         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9897   } else {
9898     Cond = IsArray;
9899     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9900         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9901   }
9902   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9903   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9904 
9905   MapperCGF.EmitBlock(BodyBB);
9906   // Get the array size by multiplying element size and element number (i.e., \p
9907   // Size).
9908   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9909       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9910   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9911   // memory allocation/deletion purpose only.
9912   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9913       MapType,
9914       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9915                                    MappableExprsHandler::OMP_MAP_FROM |
9916                                    MappableExprsHandler::OMP_MAP_MEMBER_OF)));
9917   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9918 
9919   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9920   // data structure.
9921   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9922                                    ArraySize, MapTypeArg, MapNameArg};
9923   MapperCGF.EmitRuntimeCall(
9924       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9925                                             OMPRTL___tgt_push_mapper_component),
9926       OffloadingArgs);
9927 }
9928 
9929 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9930     const OMPDeclareMapperDecl *D) {
9931   auto I = UDMMap.find(D);
9932   if (I != UDMMap.end())
9933     return I->second;
9934   emitUserDefinedMapper(D);
9935   return UDMMap.lookup(D);
9936 }
9937 
9938 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9939     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9940     llvm::Value *DeviceID,
9941     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9942                                      const OMPLoopDirective &D)>
9943         SizeEmitter) {
9944   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9945   const OMPExecutableDirective *TD = &D;
9946   // Get nested teams distribute kind directive, if any.
9947   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9948     TD = getNestedDistributeDirective(CGM.getContext(), D);
9949   if (!TD)
9950     return;
9951   const auto *LD = cast<OMPLoopDirective>(TD);
9952   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9953                                                          PrePostActionTy &) {
9954     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9955       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9956       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9957       CGF.EmitRuntimeCall(
9958           OMPBuilder.getOrCreateRuntimeFunction(
9959               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
9960           Args);
9961     }
9962   };
9963   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9964 }
9965 
9966 void CGOpenMPRuntime::emitTargetCall(
9967     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9968     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9969     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9970     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9971                                      const OMPLoopDirective &D)>
9972         SizeEmitter) {
9973   if (!CGF.HaveInsertPoint())
9974     return;
9975 
9976   assert(OutlinedFn && "Invalid outlined function!");
9977 
9978   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9979                                  D.hasClausesOfKind<OMPNowaitClause>();
9980   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9981   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9982   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9983                                             PrePostActionTy &) {
9984     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9985   };
9986   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9987 
9988   CodeGenFunction::OMPTargetDataInfo InputInfo;
9989   llvm::Value *MapTypesArray = nullptr;
9990   llvm::Value *MapNamesArray = nullptr;
9991   // Fill up the pointer arrays and transfer execution to the device.
9992   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9993                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9994                     &CapturedVars,
9995                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9996     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9997       // Reverse offloading is not supported, so just execute on the host.
9998       if (RequiresOuterTask) {
9999         CapturedVars.clear();
10000         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10001       }
10002       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10003       return;
10004     }
10005 
10006     // On top of the arrays that were filled up, the target offloading call
10007     // takes as arguments the device id as well as the host pointer. The host
10008     // pointer is used by the runtime library to identify the current target
10009     // region, so it only has to be unique and not necessarily point to
10010     // anything. It could be the pointer to the outlined function that
10011     // implements the target region, but we aren't using that so that the
10012     // compiler doesn't need to keep that, and could therefore inline the host
10013     // function if proven worthwhile during optimization.
10014 
10015     // From this point on, we need to have an ID of the target region defined.
10016     assert(OutlinedFnID && "Invalid outlined function ID!");
10017 
10018     // Emit device ID if any.
10019     llvm::Value *DeviceID;
10020     if (Device.getPointer()) {
10021       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10022               Device.getInt() == OMPC_DEVICE_device_num) &&
10023              "Expected device_num modifier.");
10024       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10025       DeviceID =
10026           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10027     } else {
10028       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10029     }
10030 
10031     // Emit the number of elements in the offloading arrays.
10032     llvm::Value *PointerNum =
10033         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10034 
10035     // Return value of the runtime offloading call.
10036     llvm::Value *Return;
10037 
10038     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10039     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10040 
10041     // Source location for the ident struct
10042     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10043 
10044     // Emit tripcount for the target loop-based directive.
10045     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10046 
10047     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10048     // The target region is an outlined function launched by the runtime
10049     // via calls __tgt_target() or __tgt_target_teams().
10050     //
10051     // __tgt_target() launches a target region with one team and one thread,
10052     // executing a serial region.  This master thread may in turn launch
10053     // more threads within its team upon encountering a parallel region,
10054     // however, no additional teams can be launched on the device.
10055     //
10056     // __tgt_target_teams() launches a target region with one or more teams,
10057     // each with one or more threads.  This call is required for target
10058     // constructs such as:
10059     //  'target teams'
10060     //  'target' / 'teams'
10061     //  'target teams distribute parallel for'
10062     //  'target parallel'
10063     // and so on.
10064     //
10065     // Note that on the host and CPU targets, the runtime implementation of
10066     // these calls simply call the outlined function without forking threads.
10067     // The outlined functions themselves have runtime calls to
10068     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10069     // the compiler in emitTeamsCall() and emitParallelCall().
10070     //
10071     // In contrast, on the NVPTX target, the implementation of
10072     // __tgt_target_teams() launches a GPU kernel with the requested number
10073     // of teams and threads so no additional calls to the runtime are required.
10074     if (NumTeams) {
10075       // If we have NumTeams defined this means that we have an enclosed teams
10076       // region. Therefore we also expect to have NumThreads defined. These two
10077       // values should be defined in the presence of a teams directive,
10078       // regardless of having any clauses associated. If the user is using teams
10079       // but no clauses, these two values will be the default that should be
10080       // passed to the runtime library - a 32-bit integer with the value zero.
10081       assert(NumThreads && "Thread limit expression should be available along "
10082                            "with number of teams.");
10083       llvm::Value *OffloadingArgs[] = {RTLoc,
10084                                        DeviceID,
10085                                        OutlinedFnID,
10086                                        PointerNum,
10087                                        InputInfo.BasePointersArray.getPointer(),
10088                                        InputInfo.PointersArray.getPointer(),
10089                                        InputInfo.SizesArray.getPointer(),
10090                                        MapTypesArray,
10091                                        MapNamesArray,
10092                                        InputInfo.MappersArray.getPointer(),
10093                                        NumTeams,
10094                                        NumThreads};
10095       Return = CGF.EmitRuntimeCall(
10096           OMPBuilder.getOrCreateRuntimeFunction(
10097               CGM.getModule(), HasNowait
10098                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10099                                    : OMPRTL___tgt_target_teams_mapper),
10100           OffloadingArgs);
10101     } else {
10102       llvm::Value *OffloadingArgs[] = {RTLoc,
10103                                        DeviceID,
10104                                        OutlinedFnID,
10105                                        PointerNum,
10106                                        InputInfo.BasePointersArray.getPointer(),
10107                                        InputInfo.PointersArray.getPointer(),
10108                                        InputInfo.SizesArray.getPointer(),
10109                                        MapTypesArray,
10110                                        MapNamesArray,
10111                                        InputInfo.MappersArray.getPointer()};
10112       Return = CGF.EmitRuntimeCall(
10113           OMPBuilder.getOrCreateRuntimeFunction(
10114               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10115                                          : OMPRTL___tgt_target_mapper),
10116           OffloadingArgs);
10117     }
10118 
10119     // Check the error code and execute the host version if required.
10120     llvm::BasicBlock *OffloadFailedBlock =
10121         CGF.createBasicBlock("omp_offload.failed");
10122     llvm::BasicBlock *OffloadContBlock =
10123         CGF.createBasicBlock("omp_offload.cont");
10124     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10125     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10126 
10127     CGF.EmitBlock(OffloadFailedBlock);
10128     if (RequiresOuterTask) {
10129       CapturedVars.clear();
10130       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10131     }
10132     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10133     CGF.EmitBranch(OffloadContBlock);
10134 
10135     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10136   };
10137 
10138   // Notify that the host version must be executed.
10139   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10140                     RequiresOuterTask](CodeGenFunction &CGF,
10141                                        PrePostActionTy &) {
10142     if (RequiresOuterTask) {
10143       CapturedVars.clear();
10144       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10145     }
10146     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10147   };
10148 
10149   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10150                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10151                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10152     // Fill up the arrays with all the captured variables.
10153     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10154 
10155     // Get mappable expression information.
10156     MappableExprsHandler MEHandler(D, CGF);
10157     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10158     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10159 
10160     auto RI = CS.getCapturedRecordDecl()->field_begin();
10161     auto *CV = CapturedVars.begin();
10162     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10163                                               CE = CS.capture_end();
10164          CI != CE; ++CI, ++RI, ++CV) {
10165       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10166       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10167 
10168       // VLA sizes are passed to the outlined region by copy and do not have map
10169       // information associated.
10170       if (CI->capturesVariableArrayType()) {
10171         CurInfo.Exprs.push_back(nullptr);
10172         CurInfo.BasePointers.push_back(*CV);
10173         CurInfo.Pointers.push_back(*CV);
10174         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10175             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10176         // Copy to the device as an argument. No need to retrieve it.
10177         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10178                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10179                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10180         CurInfo.Mappers.push_back(nullptr);
10181       } else {
10182         // If we have any information in the map clause, we use it, otherwise we
10183         // just do a default mapping.
10184         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10185         if (!CI->capturesThis())
10186           MappedVarSet.insert(CI->getCapturedVar());
10187         else
10188           MappedVarSet.insert(nullptr);
10189         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10190           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10191         // Generate correct mapping for variables captured by reference in
10192         // lambdas.
10193         if (CI->capturesVariable())
10194           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10195                                                   CurInfo, LambdaPointers);
10196       }
10197       // We expect to have at least an element of information for this capture.
10198       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10199              "Non-existing map pointer for capture!");
10200       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10201              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10202              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10203              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10204              "Inconsistent map information sizes!");
10205 
10206       // If there is an entry in PartialStruct it means we have a struct with
10207       // individual members mapped. Emit an extra combined entry.
10208       if (PartialStruct.Base.isValid()) {
10209         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10210         MEHandler.emitCombinedEntry(
10211             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10212             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10213       }
10214 
10215       // We need to append the results of this capture to what we already have.
10216       CombinedInfo.append(CurInfo);
10217     }
10218     // Adjust MEMBER_OF flags for the lambdas captures.
10219     MEHandler.adjustMemberOfForLambdaCaptures(
10220         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10221         CombinedInfo.Types);
10222     // Map any list items in a map clause that were not captures because they
10223     // weren't referenced within the construct.
10224     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10225 
10226     TargetDataInfo Info;
10227     // Fill up the arrays and create the arguments.
10228     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10229     emitOffloadingArraysArgument(
10230         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10231         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10232         {/*ForEndTask=*/false});
10233 
10234     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10235     InputInfo.BasePointersArray =
10236         Address(Info.BasePointersArray, CGM.getPointerAlign());
10237     InputInfo.PointersArray =
10238         Address(Info.PointersArray, CGM.getPointerAlign());
10239     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10240     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10241     MapTypesArray = Info.MapTypesArray;
10242     MapNamesArray = Info.MapNamesArray;
10243     if (RequiresOuterTask)
10244       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10245     else
10246       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10247   };
10248 
10249   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10250                              CodeGenFunction &CGF, PrePostActionTy &) {
10251     if (RequiresOuterTask) {
10252       CodeGenFunction::OMPTargetDataInfo InputInfo;
10253       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10254     } else {
10255       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10256     }
10257   };
10258 
10259   // If we have a target function ID it means that we need to support
10260   // offloading, otherwise, just execute on the host. We need to execute on host
10261   // regardless of the conditional in the if clause if, e.g., the user do not
10262   // specify target triples.
10263   if (OutlinedFnID) {
10264     if (IfCond) {
10265       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10266     } else {
10267       RegionCodeGenTy ThenRCG(TargetThenGen);
10268       ThenRCG(CGF);
10269     }
10270   } else {
10271     RegionCodeGenTy ElseRCG(TargetElseGen);
10272     ElseRCG(CGF);
10273   }
10274 }
10275 
10276 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10277                                                     StringRef ParentName) {
10278   if (!S)
10279     return;
10280 
10281   // Codegen OMP target directives that offload compute to the device.
10282   bool RequiresDeviceCodegen =
10283       isa<OMPExecutableDirective>(S) &&
10284       isOpenMPTargetExecutionDirective(
10285           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10286 
10287   if (RequiresDeviceCodegen) {
10288     const auto &E = *cast<OMPExecutableDirective>(S);
10289     unsigned DeviceID;
10290     unsigned FileID;
10291     unsigned Line;
10292     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10293                              FileID, Line);
10294 
10295     // Is this a target region that should not be emitted as an entry point? If
10296     // so just signal we are done with this target region.
10297     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10298                                                             ParentName, Line))
10299       return;
10300 
10301     switch (E.getDirectiveKind()) {
10302     case OMPD_target:
10303       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10304                                                    cast<OMPTargetDirective>(E));
10305       break;
10306     case OMPD_target_parallel:
10307       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10308           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10309       break;
10310     case OMPD_target_teams:
10311       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10312           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10313       break;
10314     case OMPD_target_teams_distribute:
10315       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10316           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10317       break;
10318     case OMPD_target_teams_distribute_simd:
10319       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10320           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10321       break;
10322     case OMPD_target_parallel_for:
10323       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10324           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10325       break;
10326     case OMPD_target_parallel_for_simd:
10327       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10328           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10329       break;
10330     case OMPD_target_simd:
10331       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10332           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10333       break;
10334     case OMPD_target_teams_distribute_parallel_for:
10335       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10336           CGM, ParentName,
10337           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10338       break;
10339     case OMPD_target_teams_distribute_parallel_for_simd:
10340       CodeGenFunction::
10341           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10342               CGM, ParentName,
10343               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10344       break;
10345     case OMPD_parallel:
10346     case OMPD_for:
10347     case OMPD_parallel_for:
10348     case OMPD_parallel_master:
10349     case OMPD_parallel_sections:
10350     case OMPD_for_simd:
10351     case OMPD_parallel_for_simd:
10352     case OMPD_cancel:
10353     case OMPD_cancellation_point:
10354     case OMPD_ordered:
10355     case OMPD_threadprivate:
10356     case OMPD_allocate:
10357     case OMPD_task:
10358     case OMPD_simd:
10359     case OMPD_tile:
10360     case OMPD_sections:
10361     case OMPD_section:
10362     case OMPD_single:
10363     case OMPD_master:
10364     case OMPD_critical:
10365     case OMPD_taskyield:
10366     case OMPD_barrier:
10367     case OMPD_taskwait:
10368     case OMPD_taskgroup:
10369     case OMPD_atomic:
10370     case OMPD_flush:
10371     case OMPD_depobj:
10372     case OMPD_scan:
10373     case OMPD_teams:
10374     case OMPD_target_data:
10375     case OMPD_target_exit_data:
10376     case OMPD_target_enter_data:
10377     case OMPD_distribute:
10378     case OMPD_distribute_simd:
10379     case OMPD_distribute_parallel_for:
10380     case OMPD_distribute_parallel_for_simd:
10381     case OMPD_teams_distribute:
10382     case OMPD_teams_distribute_simd:
10383     case OMPD_teams_distribute_parallel_for:
10384     case OMPD_teams_distribute_parallel_for_simd:
10385     case OMPD_target_update:
10386     case OMPD_declare_simd:
10387     case OMPD_declare_variant:
10388     case OMPD_begin_declare_variant:
10389     case OMPD_end_declare_variant:
10390     case OMPD_declare_target:
10391     case OMPD_end_declare_target:
10392     case OMPD_declare_reduction:
10393     case OMPD_declare_mapper:
10394     case OMPD_taskloop:
10395     case OMPD_taskloop_simd:
10396     case OMPD_master_taskloop:
10397     case OMPD_master_taskloop_simd:
10398     case OMPD_parallel_master_taskloop:
10399     case OMPD_parallel_master_taskloop_simd:
10400     case OMPD_requires:
10401     case OMPD_unknown:
10402     default:
10403       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10404     }
10405     return;
10406   }
10407 
10408   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10409     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10410       return;
10411 
10412     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10413     return;
10414   }
10415 
10416   // If this is a lambda function, look into its body.
10417   if (const auto *L = dyn_cast<LambdaExpr>(S))
10418     S = L->getBody();
10419 
10420   // Keep looking for target regions recursively.
10421   for (const Stmt *II : S->children())
10422     scanForTargetRegionsFunctions(II, ParentName);
10423 }
10424 
10425 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10426   // If emitting code for the host, we do not process FD here. Instead we do
10427   // the normal code generation.
10428   if (!CGM.getLangOpts().OpenMPIsDevice) {
10429     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10430       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10431           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10432       // Do not emit device_type(nohost) functions for the host.
10433       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10434         return true;
10435     }
10436     return false;
10437   }
10438 
10439   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10440   // Try to detect target regions in the function.
10441   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10442     StringRef Name = CGM.getMangledName(GD);
10443     scanForTargetRegionsFunctions(FD->getBody(), Name);
10444     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10445         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10446     // Do not emit device_type(nohost) functions for the host.
10447     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10448       return true;
10449   }
10450 
10451   // Do not to emit function if it is not marked as declare target.
10452   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10453          AlreadyEmittedTargetDecls.count(VD) == 0;
10454 }
10455 
10456 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10457   if (!CGM.getLangOpts().OpenMPIsDevice)
10458     return false;
10459 
10460   // Check if there are Ctors/Dtors in this declaration and look for target
10461   // regions in it. We use the complete variant to produce the kernel name
10462   // mangling.
10463   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10464   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10465     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10466       StringRef ParentName =
10467           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10468       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10469     }
10470     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10471       StringRef ParentName =
10472           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10473       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10474     }
10475   }
10476 
10477   // Do not to emit variable if it is not marked as declare target.
10478   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10479       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10480           cast<VarDecl>(GD.getDecl()));
10481   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10482       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10483        HasRequiresUnifiedSharedMemory)) {
10484     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10485     return true;
10486   }
10487   return false;
10488 }
10489 
10490 llvm::Constant *
10491 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10492                                                 const VarDecl *VD) {
10493   assert(VD->getType().isConstant(CGM.getContext()) &&
10494          "Expected constant variable.");
10495   StringRef VarName;
10496   llvm::Constant *Addr;
10497   llvm::GlobalValue::LinkageTypes Linkage;
10498   QualType Ty = VD->getType();
10499   SmallString<128> Buffer;
10500   {
10501     unsigned DeviceID;
10502     unsigned FileID;
10503     unsigned Line;
10504     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10505                              FileID, Line);
10506     llvm::raw_svector_ostream OS(Buffer);
10507     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10508        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10509     VarName = OS.str();
10510   }
10511   Linkage = llvm::GlobalValue::InternalLinkage;
10512   Addr =
10513       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10514                                   getDefaultFirstprivateAddressSpace());
10515   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10516   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10517   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10518   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10519       VarName, Addr, VarSize,
10520       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10521   return Addr;
10522 }
10523 
10524 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10525                                                    llvm::Constant *Addr) {
10526   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10527       !CGM.getLangOpts().OpenMPIsDevice)
10528     return;
10529   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10530       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10531   if (!Res) {
10532     if (CGM.getLangOpts().OpenMPIsDevice) {
10533       // Register non-target variables being emitted in device code (debug info
10534       // may cause this).
10535       StringRef VarName = CGM.getMangledName(VD);
10536       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10537     }
10538     return;
10539   }
10540   // Register declare target variables.
10541   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10542   StringRef VarName;
10543   CharUnits VarSize;
10544   llvm::GlobalValue::LinkageTypes Linkage;
10545 
10546   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10547       !HasRequiresUnifiedSharedMemory) {
10548     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10549     VarName = CGM.getMangledName(VD);
10550     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10551       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10552       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10553     } else {
10554       VarSize = CharUnits::Zero();
10555     }
10556     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10557     // Temp solution to prevent optimizations of the internal variables.
10558     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10559       std::string RefName = getName({VarName, "ref"});
10560       if (!CGM.GetGlobalValue(RefName)) {
10561         llvm::Constant *AddrRef =
10562             getOrCreateInternalVariable(Addr->getType(), RefName);
10563         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10564         GVAddrRef->setConstant(/*Val=*/true);
10565         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10566         GVAddrRef->setInitializer(Addr);
10567         CGM.addCompilerUsedGlobal(GVAddrRef);
10568       }
10569     }
10570   } else {
10571     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10572             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10573              HasRequiresUnifiedSharedMemory)) &&
10574            "Declare target attribute must link or to with unified memory.");
10575     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10576       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10577     else
10578       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10579 
10580     if (CGM.getLangOpts().OpenMPIsDevice) {
10581       VarName = Addr->getName();
10582       Addr = nullptr;
10583     } else {
10584       VarName = getAddrOfDeclareTargetVar(VD).getName();
10585       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10586     }
10587     VarSize = CGM.getPointerSize();
10588     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10589   }
10590 
10591   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10592       VarName, Addr, VarSize, Flags, Linkage);
10593 }
10594 
10595 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10596   if (isa<FunctionDecl>(GD.getDecl()) ||
10597       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10598     return emitTargetFunctions(GD);
10599 
10600   return emitTargetGlobalVariable(GD);
10601 }
10602 
10603 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10604   for (const VarDecl *VD : DeferredGlobalVariables) {
10605     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10606         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10607     if (!Res)
10608       continue;
10609     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10610         !HasRequiresUnifiedSharedMemory) {
10611       CGM.EmitGlobal(VD);
10612     } else {
10613       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10614               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10615                HasRequiresUnifiedSharedMemory)) &&
10616              "Expected link clause or to clause with unified memory.");
10617       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10618     }
10619   }
10620 }
10621 
10622 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10623     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10624   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10625          " Expected target-based directive.");
10626 }
10627 
10628 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10629   for (const OMPClause *Clause : D->clauselists()) {
10630     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10631       HasRequiresUnifiedSharedMemory = true;
10632     } else if (const auto *AC =
10633                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10634       switch (AC->getAtomicDefaultMemOrderKind()) {
10635       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10636         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10637         break;
10638       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10639         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10640         break;
10641       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10642         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10643         break;
10644       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10645         break;
10646       }
10647     }
10648   }
10649 }
10650 
10651 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10652   return RequiresAtomicOrdering;
10653 }
10654 
10655 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10656                                                        LangAS &AS) {
10657   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10658     return false;
10659   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10660   switch(A->getAllocatorType()) {
10661   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10662   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10663   // Not supported, fallback to the default mem space.
10664   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10665   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10666   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10667   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10668   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10669   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10670   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10671     AS = LangAS::Default;
10672     return true;
10673   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10674     llvm_unreachable("Expected predefined allocator for the variables with the "
10675                      "static storage.");
10676   }
10677   return false;
10678 }
10679 
10680 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10681   return HasRequiresUnifiedSharedMemory;
10682 }
10683 
10684 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10685     CodeGenModule &CGM)
10686     : CGM(CGM) {
10687   if (CGM.getLangOpts().OpenMPIsDevice) {
10688     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10689     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10690   }
10691 }
10692 
10693 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10694   if (CGM.getLangOpts().OpenMPIsDevice)
10695     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10696 }
10697 
10698 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10699   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10700     return true;
10701 
10702   const auto *D = cast<FunctionDecl>(GD.getDecl());
10703   // Do not to emit function if it is marked as declare target as it was already
10704   // emitted.
10705   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10706     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10707       if (auto *F = dyn_cast_or_null<llvm::Function>(
10708               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10709         return !F->isDeclaration();
10710       return false;
10711     }
10712     return true;
10713   }
10714 
10715   return !AlreadyEmittedTargetDecls.insert(D).second;
10716 }
10717 
10718 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10719   // If we don't have entries or if we are emitting code for the device, we
10720   // don't need to do anything.
10721   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10722       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10723       (OffloadEntriesInfoManager.empty() &&
10724        !HasEmittedDeclareTargetRegion &&
10725        !HasEmittedTargetRegion))
10726     return nullptr;
10727 
10728   // Create and register the function that handles the requires directives.
10729   ASTContext &C = CGM.getContext();
10730 
10731   llvm::Function *RequiresRegFn;
10732   {
10733     CodeGenFunction CGF(CGM);
10734     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10735     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10736     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10737     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10738     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10739     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10740     // TODO: check for other requires clauses.
10741     // The requires directive takes effect only when a target region is
10742     // present in the compilation unit. Otherwise it is ignored and not
10743     // passed to the runtime. This avoids the runtime from throwing an error
10744     // for mismatching requires clauses across compilation units that don't
10745     // contain at least 1 target region.
10746     assert((HasEmittedTargetRegion ||
10747             HasEmittedDeclareTargetRegion ||
10748             !OffloadEntriesInfoManager.empty()) &&
10749            "Target or declare target region expected.");
10750     if (HasRequiresUnifiedSharedMemory)
10751       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10752     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10753                             CGM.getModule(), OMPRTL___tgt_register_requires),
10754                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10755     CGF.FinishFunction();
10756   }
10757   return RequiresRegFn;
10758 }
10759 
10760 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10761                                     const OMPExecutableDirective &D,
10762                                     SourceLocation Loc,
10763                                     llvm::Function *OutlinedFn,
10764                                     ArrayRef<llvm::Value *> CapturedVars) {
10765   if (!CGF.HaveInsertPoint())
10766     return;
10767 
10768   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10769   CodeGenFunction::RunCleanupsScope Scope(CGF);
10770 
10771   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10772   llvm::Value *Args[] = {
10773       RTLoc,
10774       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10775       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10776   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10777   RealArgs.append(std::begin(Args), std::end(Args));
10778   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10779 
10780   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10781       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10782   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10783 }
10784 
10785 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10786                                          const Expr *NumTeams,
10787                                          const Expr *ThreadLimit,
10788                                          SourceLocation Loc) {
10789   if (!CGF.HaveInsertPoint())
10790     return;
10791 
10792   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10793 
10794   llvm::Value *NumTeamsVal =
10795       NumTeams
10796           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10797                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10798           : CGF.Builder.getInt32(0);
10799 
10800   llvm::Value *ThreadLimitVal =
10801       ThreadLimit
10802           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10803                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10804           : CGF.Builder.getInt32(0);
10805 
10806   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10807   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10808                                      ThreadLimitVal};
10809   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10810                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10811                       PushNumTeamsArgs);
10812 }
10813 
10814 void CGOpenMPRuntime::emitTargetDataCalls(
10815     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10816     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10817   if (!CGF.HaveInsertPoint())
10818     return;
10819 
10820   // Action used to replace the default codegen action and turn privatization
10821   // off.
10822   PrePostActionTy NoPrivAction;
10823 
10824   // Generate the code for the opening of the data environment. Capture all the
10825   // arguments of the runtime call by reference because they are used in the
10826   // closing of the region.
10827   auto &&BeginThenGen = [this, &D, Device, &Info,
10828                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10829     // Fill up the arrays with all the mapped variables.
10830     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10831 
10832     // Get map clause information.
10833     MappableExprsHandler MEHandler(D, CGF);
10834     MEHandler.generateAllInfo(CombinedInfo);
10835 
10836     // Fill up the arrays and create the arguments.
10837     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10838                          /*IsNonContiguous=*/true);
10839 
10840     llvm::Value *BasePointersArrayArg = nullptr;
10841     llvm::Value *PointersArrayArg = nullptr;
10842     llvm::Value *SizesArrayArg = nullptr;
10843     llvm::Value *MapTypesArrayArg = nullptr;
10844     llvm::Value *MapNamesArrayArg = nullptr;
10845     llvm::Value *MappersArrayArg = nullptr;
10846     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10847                                  SizesArrayArg, MapTypesArrayArg,
10848                                  MapNamesArrayArg, MappersArrayArg, Info);
10849 
10850     // Emit device ID if any.
10851     llvm::Value *DeviceID = nullptr;
10852     if (Device) {
10853       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10854                                            CGF.Int64Ty, /*isSigned=*/true);
10855     } else {
10856       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10857     }
10858 
10859     // Emit the number of elements in the offloading arrays.
10860     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10861     //
10862     // Source location for the ident struct
10863     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10864 
10865     llvm::Value *OffloadingArgs[] = {RTLoc,
10866                                      DeviceID,
10867                                      PointerNum,
10868                                      BasePointersArrayArg,
10869                                      PointersArrayArg,
10870                                      SizesArrayArg,
10871                                      MapTypesArrayArg,
10872                                      MapNamesArrayArg,
10873                                      MappersArrayArg};
10874     CGF.EmitRuntimeCall(
10875         OMPBuilder.getOrCreateRuntimeFunction(
10876             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10877         OffloadingArgs);
10878 
10879     // If device pointer privatization is required, emit the body of the region
10880     // here. It will have to be duplicated: with and without privatization.
10881     if (!Info.CaptureDeviceAddrMap.empty())
10882       CodeGen(CGF);
10883   };
10884 
10885   // Generate code for the closing of the data region.
10886   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10887                                                 PrePostActionTy &) {
10888     assert(Info.isValid() && "Invalid data environment closing arguments.");
10889 
10890     llvm::Value *BasePointersArrayArg = nullptr;
10891     llvm::Value *PointersArrayArg = nullptr;
10892     llvm::Value *SizesArrayArg = nullptr;
10893     llvm::Value *MapTypesArrayArg = nullptr;
10894     llvm::Value *MapNamesArrayArg = nullptr;
10895     llvm::Value *MappersArrayArg = nullptr;
10896     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10897                                  SizesArrayArg, MapTypesArrayArg,
10898                                  MapNamesArrayArg, MappersArrayArg, Info,
10899                                  {/*ForEndCall=*/true});
10900 
10901     // Emit device ID if any.
10902     llvm::Value *DeviceID = nullptr;
10903     if (Device) {
10904       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10905                                            CGF.Int64Ty, /*isSigned=*/true);
10906     } else {
10907       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10908     }
10909 
10910     // Emit the number of elements in the offloading arrays.
10911     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10912 
10913     // Source location for the ident struct
10914     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10915 
10916     llvm::Value *OffloadingArgs[] = {RTLoc,
10917                                      DeviceID,
10918                                      PointerNum,
10919                                      BasePointersArrayArg,
10920                                      PointersArrayArg,
10921                                      SizesArrayArg,
10922                                      MapTypesArrayArg,
10923                                      MapNamesArrayArg,
10924                                      MappersArrayArg};
10925     CGF.EmitRuntimeCall(
10926         OMPBuilder.getOrCreateRuntimeFunction(
10927             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10928         OffloadingArgs);
10929   };
10930 
10931   // If we need device pointer privatization, we need to emit the body of the
10932   // region with no privatization in the 'else' branch of the conditional.
10933   // Otherwise, we don't have to do anything.
10934   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10935                                                          PrePostActionTy &) {
10936     if (!Info.CaptureDeviceAddrMap.empty()) {
10937       CodeGen.setAction(NoPrivAction);
10938       CodeGen(CGF);
10939     }
10940   };
10941 
10942   // We don't have to do anything to close the region if the if clause evaluates
10943   // to false.
10944   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10945 
10946   if (IfCond) {
10947     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10948   } else {
10949     RegionCodeGenTy RCG(BeginThenGen);
10950     RCG(CGF);
10951   }
10952 
10953   // If we don't require privatization of device pointers, we emit the body in
10954   // between the runtime calls. This avoids duplicating the body code.
10955   if (Info.CaptureDeviceAddrMap.empty()) {
10956     CodeGen.setAction(NoPrivAction);
10957     CodeGen(CGF);
10958   }
10959 
10960   if (IfCond) {
10961     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10962   } else {
10963     RegionCodeGenTy RCG(EndThenGen);
10964     RCG(CGF);
10965   }
10966 }
10967 
10968 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10969     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10970     const Expr *Device) {
10971   if (!CGF.HaveInsertPoint())
10972     return;
10973 
10974   assert((isa<OMPTargetEnterDataDirective>(D) ||
10975           isa<OMPTargetExitDataDirective>(D) ||
10976           isa<OMPTargetUpdateDirective>(D)) &&
10977          "Expecting either target enter, exit data, or update directives.");
10978 
10979   CodeGenFunction::OMPTargetDataInfo InputInfo;
10980   llvm::Value *MapTypesArray = nullptr;
10981   llvm::Value *MapNamesArray = nullptr;
10982   // Generate the code for the opening of the data environment.
10983   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10984                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10985     // Emit device ID if any.
10986     llvm::Value *DeviceID = nullptr;
10987     if (Device) {
10988       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10989                                            CGF.Int64Ty, /*isSigned=*/true);
10990     } else {
10991       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10992     }
10993 
10994     // Emit the number of elements in the offloading arrays.
10995     llvm::Constant *PointerNum =
10996         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10997 
10998     // Source location for the ident struct
10999     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11000 
11001     llvm::Value *OffloadingArgs[] = {RTLoc,
11002                                      DeviceID,
11003                                      PointerNum,
11004                                      InputInfo.BasePointersArray.getPointer(),
11005                                      InputInfo.PointersArray.getPointer(),
11006                                      InputInfo.SizesArray.getPointer(),
11007                                      MapTypesArray,
11008                                      MapNamesArray,
11009                                      InputInfo.MappersArray.getPointer()};
11010 
11011     // Select the right runtime function call for each standalone
11012     // directive.
11013     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11014     RuntimeFunction RTLFn;
11015     switch (D.getDirectiveKind()) {
11016     case OMPD_target_enter_data:
11017       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11018                         : OMPRTL___tgt_target_data_begin_mapper;
11019       break;
11020     case OMPD_target_exit_data:
11021       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11022                         : OMPRTL___tgt_target_data_end_mapper;
11023       break;
11024     case OMPD_target_update:
11025       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11026                         : OMPRTL___tgt_target_data_update_mapper;
11027       break;
11028     case OMPD_parallel:
11029     case OMPD_for:
11030     case OMPD_parallel_for:
11031     case OMPD_parallel_master:
11032     case OMPD_parallel_sections:
11033     case OMPD_for_simd:
11034     case OMPD_parallel_for_simd:
11035     case OMPD_cancel:
11036     case OMPD_cancellation_point:
11037     case OMPD_ordered:
11038     case OMPD_threadprivate:
11039     case OMPD_allocate:
11040     case OMPD_task:
11041     case OMPD_simd:
11042     case OMPD_tile:
11043     case OMPD_sections:
11044     case OMPD_section:
11045     case OMPD_single:
11046     case OMPD_master:
11047     case OMPD_critical:
11048     case OMPD_taskyield:
11049     case OMPD_barrier:
11050     case OMPD_taskwait:
11051     case OMPD_taskgroup:
11052     case OMPD_atomic:
11053     case OMPD_flush:
11054     case OMPD_depobj:
11055     case OMPD_scan:
11056     case OMPD_teams:
11057     case OMPD_target_data:
11058     case OMPD_distribute:
11059     case OMPD_distribute_simd:
11060     case OMPD_distribute_parallel_for:
11061     case OMPD_distribute_parallel_for_simd:
11062     case OMPD_teams_distribute:
11063     case OMPD_teams_distribute_simd:
11064     case OMPD_teams_distribute_parallel_for:
11065     case OMPD_teams_distribute_parallel_for_simd:
11066     case OMPD_declare_simd:
11067     case OMPD_declare_variant:
11068     case OMPD_begin_declare_variant:
11069     case OMPD_end_declare_variant:
11070     case OMPD_declare_target:
11071     case OMPD_end_declare_target:
11072     case OMPD_declare_reduction:
11073     case OMPD_declare_mapper:
11074     case OMPD_taskloop:
11075     case OMPD_taskloop_simd:
11076     case OMPD_master_taskloop:
11077     case OMPD_master_taskloop_simd:
11078     case OMPD_parallel_master_taskloop:
11079     case OMPD_parallel_master_taskloop_simd:
11080     case OMPD_target:
11081     case OMPD_target_simd:
11082     case OMPD_target_teams_distribute:
11083     case OMPD_target_teams_distribute_simd:
11084     case OMPD_target_teams_distribute_parallel_for:
11085     case OMPD_target_teams_distribute_parallel_for_simd:
11086     case OMPD_target_teams:
11087     case OMPD_target_parallel:
11088     case OMPD_target_parallel_for:
11089     case OMPD_target_parallel_for_simd:
11090     case OMPD_requires:
11091     case OMPD_unknown:
11092     default:
11093       llvm_unreachable("Unexpected standalone target data directive.");
11094       break;
11095     }
11096     CGF.EmitRuntimeCall(
11097         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11098         OffloadingArgs);
11099   };
11100 
11101   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11102                           &MapNamesArray](CodeGenFunction &CGF,
11103                                           PrePostActionTy &) {
11104     // Fill up the arrays with all the mapped variables.
11105     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11106 
11107     // Get map clause information.
11108     MappableExprsHandler MEHandler(D, CGF);
11109     MEHandler.generateAllInfo(CombinedInfo);
11110 
11111     TargetDataInfo Info;
11112     // Fill up the arrays and create the arguments.
11113     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11114                          /*IsNonContiguous=*/true);
11115     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11116                              D.hasClausesOfKind<OMPNowaitClause>();
11117     emitOffloadingArraysArgument(
11118         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11119         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11120         {/*ForEndTask=*/false});
11121     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11122     InputInfo.BasePointersArray =
11123         Address(Info.BasePointersArray, CGM.getPointerAlign());
11124     InputInfo.PointersArray =
11125         Address(Info.PointersArray, CGM.getPointerAlign());
11126     InputInfo.SizesArray =
11127         Address(Info.SizesArray, CGM.getPointerAlign());
11128     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11129     MapTypesArray = Info.MapTypesArray;
11130     MapNamesArray = Info.MapNamesArray;
11131     if (RequiresOuterTask)
11132       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11133     else
11134       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11135   };
11136 
11137   if (IfCond) {
11138     emitIfClause(CGF, IfCond, TargetThenGen,
11139                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11140   } else {
11141     RegionCodeGenTy ThenRCG(TargetThenGen);
11142     ThenRCG(CGF);
11143   }
11144 }
11145 
11146 namespace {
11147   /// Kind of parameter in a function with 'declare simd' directive.
11148   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11149   /// Attribute set of the parameter.
11150   struct ParamAttrTy {
11151     ParamKindTy Kind = Vector;
11152     llvm::APSInt StrideOrArg;
11153     llvm::APSInt Alignment;
11154   };
11155 } // namespace
11156 
11157 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11158                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11159   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11160   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11161   // of that clause. The VLEN value must be power of 2.
11162   // In other case the notion of the function`s "characteristic data type" (CDT)
11163   // is used to compute the vector length.
11164   // CDT is defined in the following order:
11165   //   a) For non-void function, the CDT is the return type.
11166   //   b) If the function has any non-uniform, non-linear parameters, then the
11167   //   CDT is the type of the first such parameter.
11168   //   c) If the CDT determined by a) or b) above is struct, union, or class
11169   //   type which is pass-by-value (except for the type that maps to the
11170   //   built-in complex data type), the characteristic data type is int.
11171   //   d) If none of the above three cases is applicable, the CDT is int.
11172   // The VLEN is then determined based on the CDT and the size of vector
11173   // register of that ISA for which current vector version is generated. The
11174   // VLEN is computed using the formula below:
11175   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11176   // where vector register size specified in section 3.2.1 Registers and the
11177   // Stack Frame of original AMD64 ABI document.
11178   QualType RetType = FD->getReturnType();
11179   if (RetType.isNull())
11180     return 0;
11181   ASTContext &C = FD->getASTContext();
11182   QualType CDT;
11183   if (!RetType.isNull() && !RetType->isVoidType()) {
11184     CDT = RetType;
11185   } else {
11186     unsigned Offset = 0;
11187     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11188       if (ParamAttrs[Offset].Kind == Vector)
11189         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11190       ++Offset;
11191     }
11192     if (CDT.isNull()) {
11193       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11194         if (ParamAttrs[I + Offset].Kind == Vector) {
11195           CDT = FD->getParamDecl(I)->getType();
11196           break;
11197         }
11198       }
11199     }
11200   }
11201   if (CDT.isNull())
11202     CDT = C.IntTy;
11203   CDT = CDT->getCanonicalTypeUnqualified();
11204   if (CDT->isRecordType() || CDT->isUnionType())
11205     CDT = C.IntTy;
11206   return C.getTypeSize(CDT);
11207 }
11208 
11209 static void
11210 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11211                            const llvm::APSInt &VLENVal,
11212                            ArrayRef<ParamAttrTy> ParamAttrs,
11213                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11214   struct ISADataTy {
11215     char ISA;
11216     unsigned VecRegSize;
11217   };
11218   ISADataTy ISAData[] = {
11219       {
11220           'b', 128
11221       }, // SSE
11222       {
11223           'c', 256
11224       }, // AVX
11225       {
11226           'd', 256
11227       }, // AVX2
11228       {
11229           'e', 512
11230       }, // AVX512
11231   };
11232   llvm::SmallVector<char, 2> Masked;
11233   switch (State) {
11234   case OMPDeclareSimdDeclAttr::BS_Undefined:
11235     Masked.push_back('N');
11236     Masked.push_back('M');
11237     break;
11238   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11239     Masked.push_back('N');
11240     break;
11241   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11242     Masked.push_back('M');
11243     break;
11244   }
11245   for (char Mask : Masked) {
11246     for (const ISADataTy &Data : ISAData) {
11247       SmallString<256> Buffer;
11248       llvm::raw_svector_ostream Out(Buffer);
11249       Out << "_ZGV" << Data.ISA << Mask;
11250       if (!VLENVal) {
11251         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11252         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11253         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11254       } else {
11255         Out << VLENVal;
11256       }
11257       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11258         switch (ParamAttr.Kind){
11259         case LinearWithVarStride:
11260           Out << 's' << ParamAttr.StrideOrArg;
11261           break;
11262         case Linear:
11263           Out << 'l';
11264           if (ParamAttr.StrideOrArg != 1)
11265             Out << ParamAttr.StrideOrArg;
11266           break;
11267         case Uniform:
11268           Out << 'u';
11269           break;
11270         case Vector:
11271           Out << 'v';
11272           break;
11273         }
11274         if (!!ParamAttr.Alignment)
11275           Out << 'a' << ParamAttr.Alignment;
11276       }
11277       Out << '_' << Fn->getName();
11278       Fn->addFnAttr(Out.str());
11279     }
11280   }
11281 }
11282 
11283 // This are the Functions that are needed to mangle the name of the
11284 // vector functions generated by the compiler, according to the rules
11285 // defined in the "Vector Function ABI specifications for AArch64",
11286 // available at
11287 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11288 
11289 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11290 ///
11291 /// TODO: Need to implement the behavior for reference marked with a
11292 /// var or no linear modifiers (1.b in the section). For this, we
11293 /// need to extend ParamKindTy to support the linear modifiers.
11294 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11295   QT = QT.getCanonicalType();
11296 
11297   if (QT->isVoidType())
11298     return false;
11299 
11300   if (Kind == ParamKindTy::Uniform)
11301     return false;
11302 
11303   if (Kind == ParamKindTy::Linear)
11304     return false;
11305 
11306   // TODO: Handle linear references with modifiers
11307 
11308   if (Kind == ParamKindTy::LinearWithVarStride)
11309     return false;
11310 
11311   return true;
11312 }
11313 
11314 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11315 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11316   QT = QT.getCanonicalType();
11317   unsigned Size = C.getTypeSize(QT);
11318 
11319   // Only scalars and complex within 16 bytes wide set PVB to true.
11320   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11321     return false;
11322 
11323   if (QT->isFloatingType())
11324     return true;
11325 
11326   if (QT->isIntegerType())
11327     return true;
11328 
11329   if (QT->isPointerType())
11330     return true;
11331 
11332   // TODO: Add support for complex types (section 3.1.2, item 2).
11333 
11334   return false;
11335 }
11336 
11337 /// Computes the lane size (LS) of a return type or of an input parameter,
11338 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11339 /// TODO: Add support for references, section 3.2.1, item 1.
11340 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11341   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11342     QualType PTy = QT.getCanonicalType()->getPointeeType();
11343     if (getAArch64PBV(PTy, C))
11344       return C.getTypeSize(PTy);
11345   }
11346   if (getAArch64PBV(QT, C))
11347     return C.getTypeSize(QT);
11348 
11349   return C.getTypeSize(C.getUIntPtrType());
11350 }
11351 
11352 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11353 // signature of the scalar function, as defined in 3.2.2 of the
11354 // AAVFABI.
11355 static std::tuple<unsigned, unsigned, bool>
11356 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11357   QualType RetType = FD->getReturnType().getCanonicalType();
11358 
11359   ASTContext &C = FD->getASTContext();
11360 
11361   bool OutputBecomesInput = false;
11362 
11363   llvm::SmallVector<unsigned, 8> Sizes;
11364   if (!RetType->isVoidType()) {
11365     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11366     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11367       OutputBecomesInput = true;
11368   }
11369   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11370     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11371     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11372   }
11373 
11374   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11375   // The LS of a function parameter / return value can only be a power
11376   // of 2, starting from 8 bits, up to 128.
11377   assert(std::all_of(Sizes.begin(), Sizes.end(),
11378                      [](unsigned Size) {
11379                        return Size == 8 || Size == 16 || Size == 32 ||
11380                               Size == 64 || Size == 128;
11381                      }) &&
11382          "Invalid size");
11383 
11384   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11385                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11386                          OutputBecomesInput);
11387 }
11388 
11389 /// Mangle the parameter part of the vector function name according to
11390 /// their OpenMP classification. The mangling function is defined in
11391 /// section 3.5 of the AAVFABI.
11392 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11393   SmallString<256> Buffer;
11394   llvm::raw_svector_ostream Out(Buffer);
11395   for (const auto &ParamAttr : ParamAttrs) {
11396     switch (ParamAttr.Kind) {
11397     case LinearWithVarStride:
11398       Out << "ls" << ParamAttr.StrideOrArg;
11399       break;
11400     case Linear:
11401       Out << 'l';
11402       // Don't print the step value if it is not present or if it is
11403       // equal to 1.
11404       if (ParamAttr.StrideOrArg != 1)
11405         Out << ParamAttr.StrideOrArg;
11406       break;
11407     case Uniform:
11408       Out << 'u';
11409       break;
11410     case Vector:
11411       Out << 'v';
11412       break;
11413     }
11414 
11415     if (!!ParamAttr.Alignment)
11416       Out << 'a' << ParamAttr.Alignment;
11417   }
11418 
11419   return std::string(Out.str());
11420 }
11421 
11422 // Function used to add the attribute. The parameter `VLEN` is
11423 // templated to allow the use of "x" when targeting scalable functions
11424 // for SVE.
11425 template <typename T>
11426 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11427                                  char ISA, StringRef ParSeq,
11428                                  StringRef MangledName, bool OutputBecomesInput,
11429                                  llvm::Function *Fn) {
11430   SmallString<256> Buffer;
11431   llvm::raw_svector_ostream Out(Buffer);
11432   Out << Prefix << ISA << LMask << VLEN;
11433   if (OutputBecomesInput)
11434     Out << "v";
11435   Out << ParSeq << "_" << MangledName;
11436   Fn->addFnAttr(Out.str());
11437 }
11438 
11439 // Helper function to generate the Advanced SIMD names depending on
11440 // the value of the NDS when simdlen is not present.
11441 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11442                                       StringRef Prefix, char ISA,
11443                                       StringRef ParSeq, StringRef MangledName,
11444                                       bool OutputBecomesInput,
11445                                       llvm::Function *Fn) {
11446   switch (NDS) {
11447   case 8:
11448     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11449                          OutputBecomesInput, Fn);
11450     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11451                          OutputBecomesInput, Fn);
11452     break;
11453   case 16:
11454     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11455                          OutputBecomesInput, Fn);
11456     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11457                          OutputBecomesInput, Fn);
11458     break;
11459   case 32:
11460     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11461                          OutputBecomesInput, Fn);
11462     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11463                          OutputBecomesInput, Fn);
11464     break;
11465   case 64:
11466   case 128:
11467     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11468                          OutputBecomesInput, Fn);
11469     break;
11470   default:
11471     llvm_unreachable("Scalar type is too wide.");
11472   }
11473 }
11474 
11475 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11476 static void emitAArch64DeclareSimdFunction(
11477     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11478     ArrayRef<ParamAttrTy> ParamAttrs,
11479     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11480     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11481 
11482   // Get basic data for building the vector signature.
11483   const auto Data = getNDSWDS(FD, ParamAttrs);
11484   const unsigned NDS = std::get<0>(Data);
11485   const unsigned WDS = std::get<1>(Data);
11486   const bool OutputBecomesInput = std::get<2>(Data);
11487 
11488   // Check the values provided via `simdlen` by the user.
11489   // 1. A `simdlen(1)` doesn't produce vector signatures,
11490   if (UserVLEN == 1) {
11491     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11492         DiagnosticsEngine::Warning,
11493         "The clause simdlen(1) has no effect when targeting aarch64.");
11494     CGM.getDiags().Report(SLoc, DiagID);
11495     return;
11496   }
11497 
11498   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11499   // Advanced SIMD output.
11500   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11501     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11502         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11503                                     "power of 2 when targeting Advanced SIMD.");
11504     CGM.getDiags().Report(SLoc, DiagID);
11505     return;
11506   }
11507 
11508   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11509   // limits.
11510   if (ISA == 's' && UserVLEN != 0) {
11511     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11512       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11513           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11514                                       "lanes in the architectural constraints "
11515                                       "for SVE (min is 128-bit, max is "
11516                                       "2048-bit, by steps of 128-bit)");
11517       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11518       return;
11519     }
11520   }
11521 
11522   // Sort out parameter sequence.
11523   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11524   StringRef Prefix = "_ZGV";
11525   // Generate simdlen from user input (if any).
11526   if (UserVLEN) {
11527     if (ISA == 's') {
11528       // SVE generates only a masked function.
11529       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11530                            OutputBecomesInput, Fn);
11531     } else {
11532       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11533       // Advanced SIMD generates one or two functions, depending on
11534       // the `[not]inbranch` clause.
11535       switch (State) {
11536       case OMPDeclareSimdDeclAttr::BS_Undefined:
11537         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11538                              OutputBecomesInput, Fn);
11539         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11540                              OutputBecomesInput, Fn);
11541         break;
11542       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11543         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11544                              OutputBecomesInput, Fn);
11545         break;
11546       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11547         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11548                              OutputBecomesInput, Fn);
11549         break;
11550       }
11551     }
11552   } else {
11553     // If no user simdlen is provided, follow the AAVFABI rules for
11554     // generating the vector length.
11555     if (ISA == 's') {
11556       // SVE, section 3.4.1, item 1.
11557       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11558                            OutputBecomesInput, Fn);
11559     } else {
11560       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11561       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11562       // two vector names depending on the use of the clause
11563       // `[not]inbranch`.
11564       switch (State) {
11565       case OMPDeclareSimdDeclAttr::BS_Undefined:
11566         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11567                                   OutputBecomesInput, Fn);
11568         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11569                                   OutputBecomesInput, Fn);
11570         break;
11571       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11572         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11573                                   OutputBecomesInput, Fn);
11574         break;
11575       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11576         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11577                                   OutputBecomesInput, Fn);
11578         break;
11579       }
11580     }
11581   }
11582 }
11583 
11584 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11585                                               llvm::Function *Fn) {
11586   ASTContext &C = CGM.getContext();
11587   FD = FD->getMostRecentDecl();
11588   // Map params to their positions in function decl.
11589   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11590   if (isa<CXXMethodDecl>(FD))
11591     ParamPositions.try_emplace(FD, 0);
11592   unsigned ParamPos = ParamPositions.size();
11593   for (const ParmVarDecl *P : FD->parameters()) {
11594     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11595     ++ParamPos;
11596   }
11597   while (FD) {
11598     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11599       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11600       // Mark uniform parameters.
11601       for (const Expr *E : Attr->uniforms()) {
11602         E = E->IgnoreParenImpCasts();
11603         unsigned Pos;
11604         if (isa<CXXThisExpr>(E)) {
11605           Pos = ParamPositions[FD];
11606         } else {
11607           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11608                                 ->getCanonicalDecl();
11609           Pos = ParamPositions[PVD];
11610         }
11611         ParamAttrs[Pos].Kind = Uniform;
11612       }
11613       // Get alignment info.
11614       auto NI = Attr->alignments_begin();
11615       for (const Expr *E : Attr->aligneds()) {
11616         E = E->IgnoreParenImpCasts();
11617         unsigned Pos;
11618         QualType ParmTy;
11619         if (isa<CXXThisExpr>(E)) {
11620           Pos = ParamPositions[FD];
11621           ParmTy = E->getType();
11622         } else {
11623           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11624                                 ->getCanonicalDecl();
11625           Pos = ParamPositions[PVD];
11626           ParmTy = PVD->getType();
11627         }
11628         ParamAttrs[Pos].Alignment =
11629             (*NI)
11630                 ? (*NI)->EvaluateKnownConstInt(C)
11631                 : llvm::APSInt::getUnsigned(
11632                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11633                           .getQuantity());
11634         ++NI;
11635       }
11636       // Mark linear parameters.
11637       auto SI = Attr->steps_begin();
11638       auto MI = Attr->modifiers_begin();
11639       for (const Expr *E : Attr->linears()) {
11640         E = E->IgnoreParenImpCasts();
11641         unsigned Pos;
11642         // Rescaling factor needed to compute the linear parameter
11643         // value in the mangled name.
11644         unsigned PtrRescalingFactor = 1;
11645         if (isa<CXXThisExpr>(E)) {
11646           Pos = ParamPositions[FD];
11647         } else {
11648           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11649                                 ->getCanonicalDecl();
11650           Pos = ParamPositions[PVD];
11651           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11652             PtrRescalingFactor = CGM.getContext()
11653                                      .getTypeSizeInChars(P->getPointeeType())
11654                                      .getQuantity();
11655         }
11656         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11657         ParamAttr.Kind = Linear;
11658         // Assuming a stride of 1, for `linear` without modifiers.
11659         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11660         if (*SI) {
11661           Expr::EvalResult Result;
11662           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11663             if (const auto *DRE =
11664                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11665               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11666                 ParamAttr.Kind = LinearWithVarStride;
11667                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11668                     ParamPositions[StridePVD->getCanonicalDecl()]);
11669               }
11670             }
11671           } else {
11672             ParamAttr.StrideOrArg = Result.Val.getInt();
11673           }
11674         }
11675         // If we are using a linear clause on a pointer, we need to
11676         // rescale the value of linear_step with the byte size of the
11677         // pointee type.
11678         if (Linear == ParamAttr.Kind)
11679           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11680         ++SI;
11681         ++MI;
11682       }
11683       llvm::APSInt VLENVal;
11684       SourceLocation ExprLoc;
11685       const Expr *VLENExpr = Attr->getSimdlen();
11686       if (VLENExpr) {
11687         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11688         ExprLoc = VLENExpr->getExprLoc();
11689       }
11690       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11691       if (CGM.getTriple().isX86()) {
11692         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11693       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11694         unsigned VLEN = VLENVal.getExtValue();
11695         StringRef MangledName = Fn->getName();
11696         if (CGM.getTarget().hasFeature("sve"))
11697           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11698                                          MangledName, 's', 128, Fn, ExprLoc);
11699         if (CGM.getTarget().hasFeature("neon"))
11700           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11701                                          MangledName, 'n', 128, Fn, ExprLoc);
11702       }
11703     }
11704     FD = FD->getPreviousDecl();
11705   }
11706 }
11707 
11708 namespace {
11709 /// Cleanup action for doacross support.
11710 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11711 public:
11712   static const int DoacrossFinArgs = 2;
11713 
11714 private:
11715   llvm::FunctionCallee RTLFn;
11716   llvm::Value *Args[DoacrossFinArgs];
11717 
11718 public:
11719   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11720                     ArrayRef<llvm::Value *> CallArgs)
11721       : RTLFn(RTLFn) {
11722     assert(CallArgs.size() == DoacrossFinArgs);
11723     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11724   }
11725   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11726     if (!CGF.HaveInsertPoint())
11727       return;
11728     CGF.EmitRuntimeCall(RTLFn, Args);
11729   }
11730 };
11731 } // namespace
11732 
11733 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11734                                        const OMPLoopDirective &D,
11735                                        ArrayRef<Expr *> NumIterations) {
11736   if (!CGF.HaveInsertPoint())
11737     return;
11738 
11739   ASTContext &C = CGM.getContext();
11740   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11741   RecordDecl *RD;
11742   if (KmpDimTy.isNull()) {
11743     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11744     //  kmp_int64 lo; // lower
11745     //  kmp_int64 up; // upper
11746     //  kmp_int64 st; // stride
11747     // };
11748     RD = C.buildImplicitRecord("kmp_dim");
11749     RD->startDefinition();
11750     addFieldToRecordDecl(C, RD, Int64Ty);
11751     addFieldToRecordDecl(C, RD, Int64Ty);
11752     addFieldToRecordDecl(C, RD, Int64Ty);
11753     RD->completeDefinition();
11754     KmpDimTy = C.getRecordType(RD);
11755   } else {
11756     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11757   }
11758   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11759   QualType ArrayTy =
11760       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11761 
11762   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11763   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11764   enum { LowerFD = 0, UpperFD, StrideFD };
11765   // Fill dims with data.
11766   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11767     LValue DimsLVal = CGF.MakeAddrLValue(
11768         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11769     // dims.upper = num_iterations;
11770     LValue UpperLVal = CGF.EmitLValueForField(
11771         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11772     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11773         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11774         Int64Ty, NumIterations[I]->getExprLoc());
11775     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11776     // dims.stride = 1;
11777     LValue StrideLVal = CGF.EmitLValueForField(
11778         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11779     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11780                           StrideLVal);
11781   }
11782 
11783   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11784   // kmp_int32 num_dims, struct kmp_dim * dims);
11785   llvm::Value *Args[] = {
11786       emitUpdateLocation(CGF, D.getBeginLoc()),
11787       getThreadID(CGF, D.getBeginLoc()),
11788       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11789       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11790           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11791           CGM.VoidPtrTy)};
11792 
11793   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11794       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11795   CGF.EmitRuntimeCall(RTLFn, Args);
11796   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11797       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11798   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11799       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11800   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11801                                              llvm::makeArrayRef(FiniArgs));
11802 }
11803 
11804 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11805                                           const OMPDependClause *C) {
11806   QualType Int64Ty =
11807       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11808   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11809   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11810       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11811   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11812   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11813     const Expr *CounterVal = C->getLoopData(I);
11814     assert(CounterVal);
11815     llvm::Value *CntVal = CGF.EmitScalarConversion(
11816         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11817         CounterVal->getExprLoc());
11818     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11819                           /*Volatile=*/false, Int64Ty);
11820   }
11821   llvm::Value *Args[] = {
11822       emitUpdateLocation(CGF, C->getBeginLoc()),
11823       getThreadID(CGF, C->getBeginLoc()),
11824       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11825   llvm::FunctionCallee RTLFn;
11826   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11827     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11828                                                   OMPRTL___kmpc_doacross_post);
11829   } else {
11830     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11831     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11832                                                   OMPRTL___kmpc_doacross_wait);
11833   }
11834   CGF.EmitRuntimeCall(RTLFn, Args);
11835 }
11836 
11837 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11838                                llvm::FunctionCallee Callee,
11839                                ArrayRef<llvm::Value *> Args) const {
11840   assert(Loc.isValid() && "Outlined function call location must be valid.");
11841   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11842 
11843   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11844     if (Fn->doesNotThrow()) {
11845       CGF.EmitNounwindRuntimeCall(Fn, Args);
11846       return;
11847     }
11848   }
11849   CGF.EmitRuntimeCall(Callee, Args);
11850 }
11851 
11852 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11853     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11854     ArrayRef<llvm::Value *> Args) const {
11855   emitCall(CGF, Loc, OutlinedFn, Args);
11856 }
11857 
11858 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11859   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11860     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11861       HasEmittedDeclareTargetRegion = true;
11862 }
11863 
11864 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11865                                              const VarDecl *NativeParam,
11866                                              const VarDecl *TargetParam) const {
11867   return CGF.GetAddrOfLocalVar(NativeParam);
11868 }
11869 
11870 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11871                                                    const VarDecl *VD) {
11872   if (!VD)
11873     return Address::invalid();
11874   Address UntiedAddr = Address::invalid();
11875   Address UntiedRealAddr = Address::invalid();
11876   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11877   if (It != FunctionToUntiedTaskStackMap.end()) {
11878     const UntiedLocalVarsAddressesMap &UntiedData =
11879         UntiedLocalVarsStack[It->second];
11880     auto I = UntiedData.find(VD);
11881     if (I != UntiedData.end()) {
11882       UntiedAddr = I->second.first;
11883       UntiedRealAddr = I->second.second;
11884     }
11885   }
11886   const VarDecl *CVD = VD->getCanonicalDecl();
11887   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11888     // Use the default allocation.
11889     if (!isAllocatableDecl(VD))
11890       return UntiedAddr;
11891     llvm::Value *Size;
11892     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11893     if (CVD->getType()->isVariablyModifiedType()) {
11894       Size = CGF.getTypeSize(CVD->getType());
11895       // Align the size: ((size + align - 1) / align) * align
11896       Size = CGF.Builder.CreateNUWAdd(
11897           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11898       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11899       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11900     } else {
11901       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11902       Size = CGM.getSize(Sz.alignTo(Align));
11903     }
11904     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11905     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11906     assert(AA->getAllocator() &&
11907            "Expected allocator expression for non-default allocator.");
11908     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11909     // According to the standard, the original allocator type is a enum
11910     // (integer). Convert to pointer type, if required.
11911     Allocator = CGF.EmitScalarConversion(
11912         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11913         AA->getAllocator()->getExprLoc());
11914     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11915 
11916     llvm::Value *Addr =
11917         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11918                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11919                             Args, getName({CVD->getName(), ".void.addr"}));
11920     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11921         CGM.getModule(), OMPRTL___kmpc_free);
11922     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11923     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11924         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11925     if (UntiedAddr.isValid())
11926       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11927 
11928     // Cleanup action for allocate support.
11929     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11930       llvm::FunctionCallee RTLFn;
11931       unsigned LocEncoding;
11932       Address Addr;
11933       const Expr *Allocator;
11934 
11935     public:
11936       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11937                            Address Addr, const Expr *Allocator)
11938           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11939             Allocator(Allocator) {}
11940       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11941         if (!CGF.HaveInsertPoint())
11942           return;
11943         llvm::Value *Args[3];
11944         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11945             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11946         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11947             Addr.getPointer(), CGF.VoidPtrTy);
11948         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11949         // According to the standard, the original allocator type is a enum
11950         // (integer). Convert to pointer type, if required.
11951         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11952                                             CGF.getContext().VoidPtrTy,
11953                                             Allocator->getExprLoc());
11954         Args[2] = AllocVal;
11955 
11956         CGF.EmitRuntimeCall(RTLFn, Args);
11957       }
11958     };
11959     Address VDAddr =
11960         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11961     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11962         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11963         VDAddr, AA->getAllocator());
11964     if (UntiedRealAddr.isValid())
11965       if (auto *Region =
11966               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11967         Region->emitUntiedSwitch(CGF);
11968     return VDAddr;
11969   }
11970   return UntiedAddr;
11971 }
11972 
11973 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11974                                              const VarDecl *VD) const {
11975   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11976   if (It == FunctionToUntiedTaskStackMap.end())
11977     return false;
11978   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11979 }
11980 
11981 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11982     CodeGenModule &CGM, const OMPLoopDirective &S)
11983     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11984   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11985   if (!NeedToPush)
11986     return;
11987   NontemporalDeclsSet &DS =
11988       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11989   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11990     for (const Stmt *Ref : C->private_refs()) {
11991       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11992       const ValueDecl *VD;
11993       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11994         VD = DRE->getDecl();
11995       } else {
11996         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11997         assert((ME->isImplicitCXXThis() ||
11998                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11999                "Expected member of current class.");
12000         VD = ME->getMemberDecl();
12001       }
12002       DS.insert(VD);
12003     }
12004   }
12005 }
12006 
12007 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12008   if (!NeedToPush)
12009     return;
12010   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12011 }
12012 
12013 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12014     CodeGenFunction &CGF,
12015     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12016                          std::pair<Address, Address>> &LocalVars)
12017     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12018   if (!NeedToPush)
12019     return;
12020   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12021       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12022   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12023 }
12024 
12025 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12026   if (!NeedToPush)
12027     return;
12028   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12029 }
12030 
12031 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12032   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12033 
12034   return llvm::any_of(
12035       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12036       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12037 }
12038 
12039 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12040     const OMPExecutableDirective &S,
12041     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12042     const {
12043   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12044   // Vars in target/task regions must be excluded completely.
12045   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12046       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12047     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12048     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12049     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12050     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12051       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12052         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12053     }
12054   }
12055   // Exclude vars in private clauses.
12056   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12057     for (const Expr *Ref : C->varlists()) {
12058       if (!Ref->getType()->isScalarType())
12059         continue;
12060       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12061       if (!DRE)
12062         continue;
12063       NeedToCheckForLPCs.insert(DRE->getDecl());
12064     }
12065   }
12066   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12067     for (const Expr *Ref : C->varlists()) {
12068       if (!Ref->getType()->isScalarType())
12069         continue;
12070       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12071       if (!DRE)
12072         continue;
12073       NeedToCheckForLPCs.insert(DRE->getDecl());
12074     }
12075   }
12076   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12077     for (const Expr *Ref : C->varlists()) {
12078       if (!Ref->getType()->isScalarType())
12079         continue;
12080       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12081       if (!DRE)
12082         continue;
12083       NeedToCheckForLPCs.insert(DRE->getDecl());
12084     }
12085   }
12086   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12087     for (const Expr *Ref : C->varlists()) {
12088       if (!Ref->getType()->isScalarType())
12089         continue;
12090       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12091       if (!DRE)
12092         continue;
12093       NeedToCheckForLPCs.insert(DRE->getDecl());
12094     }
12095   }
12096   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12097     for (const Expr *Ref : C->varlists()) {
12098       if (!Ref->getType()->isScalarType())
12099         continue;
12100       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12101       if (!DRE)
12102         continue;
12103       NeedToCheckForLPCs.insert(DRE->getDecl());
12104     }
12105   }
12106   for (const Decl *VD : NeedToCheckForLPCs) {
12107     for (const LastprivateConditionalData &Data :
12108          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12109       if (Data.DeclToUniqueName.count(VD) > 0) {
12110         if (!Data.Disabled)
12111           NeedToAddForLPCsAsDisabled.insert(VD);
12112         break;
12113       }
12114     }
12115   }
12116 }
12117 
12118 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12119     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12120     : CGM(CGF.CGM),
12121       Action((CGM.getLangOpts().OpenMP >= 50 &&
12122               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12123                            [](const OMPLastprivateClause *C) {
12124                              return C->getKind() ==
12125                                     OMPC_LASTPRIVATE_conditional;
12126                            }))
12127                  ? ActionToDo::PushAsLastprivateConditional
12128                  : ActionToDo::DoNotPush) {
12129   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12130   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12131     return;
12132   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12133          "Expected a push action.");
12134   LastprivateConditionalData &Data =
12135       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12136   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12137     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12138       continue;
12139 
12140     for (const Expr *Ref : C->varlists()) {
12141       Data.DeclToUniqueName.insert(std::make_pair(
12142           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12143           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12144     }
12145   }
12146   Data.IVLVal = IVLVal;
12147   Data.Fn = CGF.CurFn;
12148 }
12149 
12150 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12151     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12152     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12153   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12154   if (CGM.getLangOpts().OpenMP < 50)
12155     return;
12156   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12157   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12158   if (!NeedToAddForLPCsAsDisabled.empty()) {
12159     Action = ActionToDo::DisableLastprivateConditional;
12160     LastprivateConditionalData &Data =
12161         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12162     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12163       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12164     Data.Fn = CGF.CurFn;
12165     Data.Disabled = true;
12166   }
12167 }
12168 
12169 CGOpenMPRuntime::LastprivateConditionalRAII
12170 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12171     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12172   return LastprivateConditionalRAII(CGF, S);
12173 }
12174 
12175 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12176   if (CGM.getLangOpts().OpenMP < 50)
12177     return;
12178   if (Action == ActionToDo::DisableLastprivateConditional) {
12179     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12180            "Expected list of disabled private vars.");
12181     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12182   }
12183   if (Action == ActionToDo::PushAsLastprivateConditional) {
12184     assert(
12185         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12186         "Expected list of lastprivate conditional vars.");
12187     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12188   }
12189 }
12190 
12191 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12192                                                         const VarDecl *VD) {
12193   ASTContext &C = CGM.getContext();
12194   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12195   if (I == LastprivateConditionalToTypes.end())
12196     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12197   QualType NewType;
12198   const FieldDecl *VDField;
12199   const FieldDecl *FiredField;
12200   LValue BaseLVal;
12201   auto VI = I->getSecond().find(VD);
12202   if (VI == I->getSecond().end()) {
12203     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12204     RD->startDefinition();
12205     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12206     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12207     RD->completeDefinition();
12208     NewType = C.getRecordType(RD);
12209     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12210     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12211     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12212   } else {
12213     NewType = std::get<0>(VI->getSecond());
12214     VDField = std::get<1>(VI->getSecond());
12215     FiredField = std::get<2>(VI->getSecond());
12216     BaseLVal = std::get<3>(VI->getSecond());
12217   }
12218   LValue FiredLVal =
12219       CGF.EmitLValueForField(BaseLVal, FiredField);
12220   CGF.EmitStoreOfScalar(
12221       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12222       FiredLVal);
12223   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12224 }
12225 
12226 namespace {
12227 /// Checks if the lastprivate conditional variable is referenced in LHS.
12228 class LastprivateConditionalRefChecker final
12229     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12230   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12231   const Expr *FoundE = nullptr;
12232   const Decl *FoundD = nullptr;
12233   StringRef UniqueDeclName;
12234   LValue IVLVal;
12235   llvm::Function *FoundFn = nullptr;
12236   SourceLocation Loc;
12237 
12238 public:
12239   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12240     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12241          llvm::reverse(LPM)) {
12242       auto It = D.DeclToUniqueName.find(E->getDecl());
12243       if (It == D.DeclToUniqueName.end())
12244         continue;
12245       if (D.Disabled)
12246         return false;
12247       FoundE = E;
12248       FoundD = E->getDecl()->getCanonicalDecl();
12249       UniqueDeclName = It->second;
12250       IVLVal = D.IVLVal;
12251       FoundFn = D.Fn;
12252       break;
12253     }
12254     return FoundE == E;
12255   }
12256   bool VisitMemberExpr(const MemberExpr *E) {
12257     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12258       return false;
12259     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12260          llvm::reverse(LPM)) {
12261       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12262       if (It == D.DeclToUniqueName.end())
12263         continue;
12264       if (D.Disabled)
12265         return false;
12266       FoundE = E;
12267       FoundD = E->getMemberDecl()->getCanonicalDecl();
12268       UniqueDeclName = It->second;
12269       IVLVal = D.IVLVal;
12270       FoundFn = D.Fn;
12271       break;
12272     }
12273     return FoundE == E;
12274   }
12275   bool VisitStmt(const Stmt *S) {
12276     for (const Stmt *Child : S->children()) {
12277       if (!Child)
12278         continue;
12279       if (const auto *E = dyn_cast<Expr>(Child))
12280         if (!E->isGLValue())
12281           continue;
12282       if (Visit(Child))
12283         return true;
12284     }
12285     return false;
12286   }
12287   explicit LastprivateConditionalRefChecker(
12288       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12289       : LPM(LPM) {}
12290   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12291   getFoundData() const {
12292     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12293   }
12294 };
12295 } // namespace
12296 
12297 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12298                                                        LValue IVLVal,
12299                                                        StringRef UniqueDeclName,
12300                                                        LValue LVal,
12301                                                        SourceLocation Loc) {
12302   // Last updated loop counter for the lastprivate conditional var.
12303   // int<xx> last_iv = 0;
12304   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12305   llvm::Constant *LastIV =
12306       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12307   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12308       IVLVal.getAlignment().getAsAlign());
12309   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12310 
12311   // Last value of the lastprivate conditional.
12312   // decltype(priv_a) last_a;
12313   llvm::Constant *Last = getOrCreateInternalVariable(
12314       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12315   cast<llvm::GlobalVariable>(Last)->setAlignment(
12316       LVal.getAlignment().getAsAlign());
12317   LValue LastLVal =
12318       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12319 
12320   // Global loop counter. Required to handle inner parallel-for regions.
12321   // iv
12322   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12323 
12324   // #pragma omp critical(a)
12325   // if (last_iv <= iv) {
12326   //   last_iv = iv;
12327   //   last_a = priv_a;
12328   // }
12329   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12330                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12331     Action.Enter(CGF);
12332     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12333     // (last_iv <= iv) ? Check if the variable is updated and store new
12334     // value in global var.
12335     llvm::Value *CmpRes;
12336     if (IVLVal.getType()->isSignedIntegerType()) {
12337       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12338     } else {
12339       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12340              "Loop iteration variable must be integer.");
12341       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12342     }
12343     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12344     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12345     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12346     // {
12347     CGF.EmitBlock(ThenBB);
12348 
12349     //   last_iv = iv;
12350     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12351 
12352     //   last_a = priv_a;
12353     switch (CGF.getEvaluationKind(LVal.getType())) {
12354     case TEK_Scalar: {
12355       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12356       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12357       break;
12358     }
12359     case TEK_Complex: {
12360       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12361       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12362       break;
12363     }
12364     case TEK_Aggregate:
12365       llvm_unreachable(
12366           "Aggregates are not supported in lastprivate conditional.");
12367     }
12368     // }
12369     CGF.EmitBranch(ExitBB);
12370     // There is no need to emit line number for unconditional branch.
12371     (void)ApplyDebugLocation::CreateEmpty(CGF);
12372     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12373   };
12374 
12375   if (CGM.getLangOpts().OpenMPSimd) {
12376     // Do not emit as a critical region as no parallel region could be emitted.
12377     RegionCodeGenTy ThenRCG(CodeGen);
12378     ThenRCG(CGF);
12379   } else {
12380     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12381   }
12382 }
12383 
12384 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12385                                                          const Expr *LHS) {
12386   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12387     return;
12388   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12389   if (!Checker.Visit(LHS))
12390     return;
12391   const Expr *FoundE;
12392   const Decl *FoundD;
12393   StringRef UniqueDeclName;
12394   LValue IVLVal;
12395   llvm::Function *FoundFn;
12396   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12397       Checker.getFoundData();
12398   if (FoundFn != CGF.CurFn) {
12399     // Special codegen for inner parallel regions.
12400     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12401     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12402     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12403            "Lastprivate conditional is not found in outer region.");
12404     QualType StructTy = std::get<0>(It->getSecond());
12405     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12406     LValue PrivLVal = CGF.EmitLValue(FoundE);
12407     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12408         PrivLVal.getAddress(CGF),
12409         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12410     LValue BaseLVal =
12411         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12412     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12413     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12414                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12415                         FiredLVal, llvm::AtomicOrdering::Unordered,
12416                         /*IsVolatile=*/true, /*isInit=*/false);
12417     return;
12418   }
12419 
12420   // Private address of the lastprivate conditional in the current context.
12421   // priv_a
12422   LValue LVal = CGF.EmitLValue(FoundE);
12423   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12424                                    FoundE->getExprLoc());
12425 }
12426 
12427 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12428     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12429     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12430   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12431     return;
12432   auto Range = llvm::reverse(LastprivateConditionalStack);
12433   auto It = llvm::find_if(
12434       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12435   if (It == Range.end() || It->Fn != CGF.CurFn)
12436     return;
12437   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12438   assert(LPCI != LastprivateConditionalToTypes.end() &&
12439          "Lastprivates must be registered already.");
12440   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12441   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12442   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12443   for (const auto &Pair : It->DeclToUniqueName) {
12444     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12445     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12446       continue;
12447     auto I = LPCI->getSecond().find(Pair.first);
12448     assert(I != LPCI->getSecond().end() &&
12449            "Lastprivate must be rehistered already.");
12450     // bool Cmp = priv_a.Fired != 0;
12451     LValue BaseLVal = std::get<3>(I->getSecond());
12452     LValue FiredLVal =
12453         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12454     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12455     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12456     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12457     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12458     // if (Cmp) {
12459     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12460     CGF.EmitBlock(ThenBB);
12461     Address Addr = CGF.GetAddrOfLocalVar(VD);
12462     LValue LVal;
12463     if (VD->getType()->isReferenceType())
12464       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12465                                            AlignmentSource::Decl);
12466     else
12467       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12468                                 AlignmentSource::Decl);
12469     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12470                                      D.getBeginLoc());
12471     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12472     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12473     // }
12474   }
12475 }
12476 
12477 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12478     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12479     SourceLocation Loc) {
12480   if (CGF.getLangOpts().OpenMP < 50)
12481     return;
12482   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12483   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12484          "Unknown lastprivate conditional variable.");
12485   StringRef UniqueName = It->second;
12486   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12487   // The variable was not updated in the region - exit.
12488   if (!GV)
12489     return;
12490   LValue LPLVal = CGF.MakeAddrLValue(
12491       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12492   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12493   CGF.EmitStoreOfScalar(Res, PrivLVal);
12494 }
12495 
12496 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12497     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12498     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12499   llvm_unreachable("Not supported in SIMD-only mode");
12500 }
12501 
12502 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12503     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12504     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12505   llvm_unreachable("Not supported in SIMD-only mode");
12506 }
12507 
12508 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12509     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12510     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12511     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12512     bool Tied, unsigned &NumberOfParts) {
12513   llvm_unreachable("Not supported in SIMD-only mode");
12514 }
12515 
12516 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12517                                            SourceLocation Loc,
12518                                            llvm::Function *OutlinedFn,
12519                                            ArrayRef<llvm::Value *> CapturedVars,
12520                                            const Expr *IfCond) {
12521   llvm_unreachable("Not supported in SIMD-only mode");
12522 }
12523 
12524 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12525     CodeGenFunction &CGF, StringRef CriticalName,
12526     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12527     const Expr *Hint) {
12528   llvm_unreachable("Not supported in SIMD-only mode");
12529 }
12530 
12531 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12532                                            const RegionCodeGenTy &MasterOpGen,
12533                                            SourceLocation Loc) {
12534   llvm_unreachable("Not supported in SIMD-only mode");
12535 }
12536 
12537 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12538                                             SourceLocation Loc) {
12539   llvm_unreachable("Not supported in SIMD-only mode");
12540 }
12541 
12542 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12543     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12544     SourceLocation Loc) {
12545   llvm_unreachable("Not supported in SIMD-only mode");
12546 }
12547 
12548 void CGOpenMPSIMDRuntime::emitSingleRegion(
12549     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12550     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12551     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12552     ArrayRef<const Expr *> AssignmentOps) {
12553   llvm_unreachable("Not supported in SIMD-only mode");
12554 }
12555 
12556 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12557                                             const RegionCodeGenTy &OrderedOpGen,
12558                                             SourceLocation Loc,
12559                                             bool IsThreads) {
12560   llvm_unreachable("Not supported in SIMD-only mode");
12561 }
12562 
12563 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12564                                           SourceLocation Loc,
12565                                           OpenMPDirectiveKind Kind,
12566                                           bool EmitChecks,
12567                                           bool ForceSimpleCall) {
12568   llvm_unreachable("Not supported in SIMD-only mode");
12569 }
12570 
12571 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12572     CodeGenFunction &CGF, SourceLocation Loc,
12573     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12574     bool Ordered, const DispatchRTInput &DispatchValues) {
12575   llvm_unreachable("Not supported in SIMD-only mode");
12576 }
12577 
12578 void CGOpenMPSIMDRuntime::emitForStaticInit(
12579     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12580     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12581   llvm_unreachable("Not supported in SIMD-only mode");
12582 }
12583 
12584 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12585     CodeGenFunction &CGF, SourceLocation Loc,
12586     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12587   llvm_unreachable("Not supported in SIMD-only mode");
12588 }
12589 
12590 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12591                                                      SourceLocation Loc,
12592                                                      unsigned IVSize,
12593                                                      bool IVSigned) {
12594   llvm_unreachable("Not supported in SIMD-only mode");
12595 }
12596 
12597 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12598                                               SourceLocation Loc,
12599                                               OpenMPDirectiveKind DKind) {
12600   llvm_unreachable("Not supported in SIMD-only mode");
12601 }
12602 
12603 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12604                                               SourceLocation Loc,
12605                                               unsigned IVSize, bool IVSigned,
12606                                               Address IL, Address LB,
12607                                               Address UB, Address ST) {
12608   llvm_unreachable("Not supported in SIMD-only mode");
12609 }
12610 
12611 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12612                                                llvm::Value *NumThreads,
12613                                                SourceLocation Loc) {
12614   llvm_unreachable("Not supported in SIMD-only mode");
12615 }
12616 
12617 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12618                                              ProcBindKind ProcBind,
12619                                              SourceLocation Loc) {
12620   llvm_unreachable("Not supported in SIMD-only mode");
12621 }
12622 
12623 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12624                                                     const VarDecl *VD,
12625                                                     Address VDAddr,
12626                                                     SourceLocation Loc) {
12627   llvm_unreachable("Not supported in SIMD-only mode");
12628 }
12629 
12630 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12631     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12632     CodeGenFunction *CGF) {
12633   llvm_unreachable("Not supported in SIMD-only mode");
12634 }
12635 
12636 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12637     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12638   llvm_unreachable("Not supported in SIMD-only mode");
12639 }
12640 
12641 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12642                                     ArrayRef<const Expr *> Vars,
12643                                     SourceLocation Loc,
12644                                     llvm::AtomicOrdering AO) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12649                                        const OMPExecutableDirective &D,
12650                                        llvm::Function *TaskFunction,
12651                                        QualType SharedsTy, Address Shareds,
12652                                        const Expr *IfCond,
12653                                        const OMPTaskDataTy &Data) {
12654   llvm_unreachable("Not supported in SIMD-only mode");
12655 }
12656 
12657 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12658     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12659     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12660     const Expr *IfCond, const OMPTaskDataTy &Data) {
12661   llvm_unreachable("Not supported in SIMD-only mode");
12662 }
12663 
12664 void CGOpenMPSIMDRuntime::emitReduction(
12665     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12666     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12667     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12668   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12669   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12670                                  ReductionOps, Options);
12671 }
12672 
12673 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12674     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12675     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12676   llvm_unreachable("Not supported in SIMD-only mode");
12677 }
12678 
12679 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12680                                                 SourceLocation Loc,
12681                                                 bool IsWorksharingReduction) {
12682   llvm_unreachable("Not supported in SIMD-only mode");
12683 }
12684 
12685 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12686                                                   SourceLocation Loc,
12687                                                   ReductionCodeGen &RCG,
12688                                                   unsigned N) {
12689   llvm_unreachable("Not supported in SIMD-only mode");
12690 }
12691 
12692 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12693                                                   SourceLocation Loc,
12694                                                   llvm::Value *ReductionsPtr,
12695                                                   LValue SharedLVal) {
12696   llvm_unreachable("Not supported in SIMD-only mode");
12697 }
12698 
12699 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12700                                            SourceLocation Loc) {
12701   llvm_unreachable("Not supported in SIMD-only mode");
12702 }
12703 
12704 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12705     CodeGenFunction &CGF, SourceLocation Loc,
12706     OpenMPDirectiveKind CancelRegion) {
12707   llvm_unreachable("Not supported in SIMD-only mode");
12708 }
12709 
12710 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12711                                          SourceLocation Loc, const Expr *IfCond,
12712                                          OpenMPDirectiveKind CancelRegion) {
12713   llvm_unreachable("Not supported in SIMD-only mode");
12714 }
12715 
12716 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12717     const OMPExecutableDirective &D, StringRef ParentName,
12718     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12719     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12720   llvm_unreachable("Not supported in SIMD-only mode");
12721 }
12722 
12723 void CGOpenMPSIMDRuntime::emitTargetCall(
12724     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12725     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12726     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12727     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12728                                      const OMPLoopDirective &D)>
12729         SizeEmitter) {
12730   llvm_unreachable("Not supported in SIMD-only mode");
12731 }
12732 
12733 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12734   llvm_unreachable("Not supported in SIMD-only mode");
12735 }
12736 
12737 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12738   llvm_unreachable("Not supported in SIMD-only mode");
12739 }
12740 
12741 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12742   return false;
12743 }
12744 
12745 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12746                                         const OMPExecutableDirective &D,
12747                                         SourceLocation Loc,
12748                                         llvm::Function *OutlinedFn,
12749                                         ArrayRef<llvm::Value *> CapturedVars) {
12750   llvm_unreachable("Not supported in SIMD-only mode");
12751 }
12752 
12753 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12754                                              const Expr *NumTeams,
12755                                              const Expr *ThreadLimit,
12756                                              SourceLocation Loc) {
12757   llvm_unreachable("Not supported in SIMD-only mode");
12758 }
12759 
12760 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12761     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12762     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12763   llvm_unreachable("Not supported in SIMD-only mode");
12764 }
12765 
12766 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12767     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12768     const Expr *Device) {
12769   llvm_unreachable("Not supported in SIMD-only mode");
12770 }
12771 
12772 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12773                                            const OMPLoopDirective &D,
12774                                            ArrayRef<Expr *> NumIterations) {
12775   llvm_unreachable("Not supported in SIMD-only mode");
12776 }
12777 
12778 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12779                                               const OMPDependClause *C) {
12780   llvm_unreachable("Not supported in SIMD-only mode");
12781 }
12782 
12783 const VarDecl *
12784 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12785                                         const VarDecl *NativeParam) const {
12786   llvm_unreachable("Not supported in SIMD-only mode");
12787 }
12788 
12789 Address
12790 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12791                                          const VarDecl *NativeParam,
12792                                          const VarDecl *TargetParam) const {
12793   llvm_unreachable("Not supported in SIMD-only mode");
12794 }
12795