1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412   bool NoInheritance = false;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel,
421                           bool NoInheritance = true)
422       : CGF(CGF), NoInheritance(NoInheritance) {
423     // Start emission for the construct.
424     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
425         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
426     if (NoInheritance) {
427       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
429       CGF.LambdaThisCaptureField = nullptr;
430       BlockInfo = CGF.BlockInfo;
431       CGF.BlockInfo = nullptr;
432     }
433   }
434 
435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     if (NoInheritance) {
442       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
443       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
444       CGF.BlockInfo = BlockInfo;
445     }
446   }
447 };
448 
449 /// Values for bit flags used in the ident_t to describe the fields.
450 /// All enumeric elements are named and described in accordance with the code
451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
452 enum OpenMPLocationFlags : unsigned {
453   /// Use trampoline for internal microtask.
454   OMP_IDENT_IMD = 0x01,
455   /// Use c-style ident structure.
456   OMP_IDENT_KMPC = 0x02,
457   /// Atomic reduction option for kmpc_reduce.
458   OMP_ATOMIC_REDUCE = 0x10,
459   /// Explicit 'barrier' directive.
460   OMP_IDENT_BARRIER_EXPL = 0x20,
461   /// Implicit barrier in code.
462   OMP_IDENT_BARRIER_IMPL = 0x40,
463   /// Implicit barrier in 'for' directive.
464   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
465   /// Implicit barrier in 'sections' directive.
466   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
467   /// Implicit barrier in 'single' directive.
468   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
469   /// Call of __kmp_for_static_init for static loop.
470   OMP_IDENT_WORK_LOOP = 0x200,
471   /// Call of __kmp_for_static_init for sections.
472   OMP_IDENT_WORK_SECTIONS = 0x400,
473   /// Call of __kmp_for_static_init for distribute.
474   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
475   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
476 };
477 
478 namespace {
479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
480 /// Values for bit flags for marking which requires clauses have been used.
481 enum OpenMPOffloadingRequiresDirFlags : int64_t {
482   /// flag undefined.
483   OMP_REQ_UNDEFINED               = 0x000,
484   /// no requires clause present.
485   OMP_REQ_NONE                    = 0x001,
486   /// reverse_offload clause.
487   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
488   /// unified_address clause.
489   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
490   /// unified_shared_memory clause.
491   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
492   /// dynamic_allocators clause.
493   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
494   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
495 };
496 
497 enum OpenMPOffloadingReservedDeviceIDs {
498   /// Device ID if the device was not defined, runtime should get it
499   /// from environment variables in the spec.
500   OMP_DEVICEID_UNDEF = -1,
501 };
502 } // anonymous namespace
503 
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
510 ///                                  see above  */
511 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
512 ///                                  KMP_IDENT_KMPC identifies this union
513 ///                                  member  */
514 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
515 ///                                  see above */
516 ///#if USE_ITT_BUILD
517 ///                            /*  but currently used for storing
518 ///                                region-specific ITT */
519 ///                            /*  contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
522 ///                                 C++  */
523 ///    char const *psource;    /**< String describing the source location.
524 ///                            The string is composed of semi-colon separated
525 //                             fields which describe the source file,
526 ///                            the function and a pair of line numbers that
527 ///                            delimit the construct.
528 ///                             */
529 /// } ident_t;
530 enum IdentFieldIndex {
531   /// might be used in Fortran
532   IdentField_Reserved_1,
533   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534   IdentField_Flags,
535   /// Not really used in Fortran any more
536   IdentField_Reserved_2,
537   /// Source[4] in Fortran, do not use for C++
538   IdentField_Reserved_3,
539   /// String describing the source location. The string is composed of
540   /// semi-colon separated fields which describe the source file, the function
541   /// and a pair of line numbers that delimit the construct.
542   IdentField_PSource
543 };
544 
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548   /// Lower bound for default (unordered) versions.
549   OMP_sch_lower = 32,
550   OMP_sch_static_chunked = 33,
551   OMP_sch_static = 34,
552   OMP_sch_dynamic_chunked = 35,
553   OMP_sch_guided_chunked = 36,
554   OMP_sch_runtime = 37,
555   OMP_sch_auto = 38,
556   /// static with chunk adjustment (e.g., simd)
557   OMP_sch_static_balanced_chunked = 45,
558   /// Lower bound for 'ordered' versions.
559   OMP_ord_lower = 64,
560   OMP_ord_static_chunked = 65,
561   OMP_ord_static = 66,
562   OMP_ord_dynamic_chunked = 67,
563   OMP_ord_guided_chunked = 68,
564   OMP_ord_runtime = 69,
565   OMP_ord_auto = 70,
566   OMP_sch_default = OMP_sch_static,
567   /// dist_schedule types
568   OMP_dist_sch_static_chunked = 91,
569   OMP_dist_sch_static = 92,
570   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571   /// Set if the monotonic schedule modifier was present.
572   OMP_sch_modifier_monotonic = (1 << 29),
573   /// Set if the nonmonotonic schedule modifier was present.
574   OMP_sch_modifier_nonmonotonic = (1 << 30),
575 };
576 
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580   PrePostActionTy *Action;
581 
582 public:
583   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585     if (!CGF.HaveInsertPoint())
586       return;
587     Action->Exit(CGF);
588   }
589 };
590 
591 } // anonymous namespace
592 
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594   CodeGenFunction::RunCleanupsScope Scope(CGF);
595   if (PrePostAction) {
596     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597     Callback(CodeGen, CGF, *PrePostAction);
598   } else {
599     PrePostActionTy Action;
600     Callback(CodeGen, CGF, Action);
601   }
602 }
603 
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610       if (const auto *DRE =
611               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613           return DRD;
614   return nullptr;
615 }
616 
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618                                              const OMPDeclareReductionDecl *DRD,
619                                              const Expr *InitOp,
620                                              Address Private, Address Original,
621                                              QualType Ty) {
622   if (DRD->getInitializer()) {
623     std::pair<llvm::Function *, llvm::Function *> Reduction =
624         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625     const auto *CE = cast<CallExpr>(InitOp);
626     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629     const auto *LHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631     const auto *RHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
635                             [=]() { return Private; });
636     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
637                             [=]() { return Original; });
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate: {
659       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
660       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
661       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
662                            /*IsInitializer=*/false);
663       return;
664     }
665     }
666     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
667     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
668     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
669                          /*IsInitializer=*/false);
670   }
671 }
672 
673 /// Emit initialization of arrays of complex types.
674 /// \param DestAddr Address of the array.
675 /// \param Type Type of array.
676 /// \param Init Initial expression of array.
677 /// \param SrcAddr Address of the original array.
678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
679                                  QualType Type, bool EmitDeclareReductionInit,
680                                  const Expr *Init,
681                                  const OMPDeclareReductionDecl *DRD,
682                                  Address SrcAddr = Address::invalid()) {
683   // Perform element-by-element initialization.
684   QualType ElementTy;
685 
686   // Drill down to the base element type on both arrays.
687   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
688   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
689   DestAddr =
690       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI,
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI,
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
746     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
747   }
748 
749   // Shift the address forward by one element.
750   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
751       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
752   // Check whether we've reached the end.
753   llvm::Value *Done =
754       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
756   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
757 
758   // Done.
759   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
760 }
761 
762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
763   return CGF.EmitOMPSharedLValue(E);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
767                                             const Expr *E) {
768   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
769     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
770   return LValue();
771 }
772 
773 void ReductionCodeGen::emitAggregateInitialization(
774     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
775     const OMPDeclareReductionDecl *DRD) {
776   // Emit VarDecl with copy init for arrays.
777   // Get the address of the original variable captured in current
778   // captured region.
779   const auto *PrivateVD =
780       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
781   bool EmitDeclareReductionInit =
782       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
783   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
784                        EmitDeclareReductionInit,
785                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
786                                                 : PrivateVD->getInit(),
787                        DRD, SharedLVal.getAddress(CGF));
788 }
789 
790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
791                                    ArrayRef<const Expr *> Origs,
792                                    ArrayRef<const Expr *> Privates,
793                                    ArrayRef<const Expr *> ReductionOps) {
794   ClausesData.reserve(Shareds.size());
795   SharedAddresses.reserve(Shareds.size());
796   Sizes.reserve(Shareds.size());
797   BaseDecls.reserve(Shareds.size());
798   const auto *IOrig = Origs.begin();
799   const auto *IPriv = Privates.begin();
800   const auto *IRed = ReductionOps.begin();
801   for (const Expr *Ref : Shareds) {
802     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
803     std::advance(IOrig, 1);
804     std::advance(IPriv, 1);
805     std::advance(IRed, 1);
806   }
807 }
808 
809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
810   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
811          "Number of generated lvalues must be exactly N.");
812   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
813   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
814   SharedAddresses.emplace_back(First, Second);
815   if (ClausesData[N].Shared == ClausesData[N].Ref) {
816     OrigAddresses.emplace_back(First, Second);
817   } else {
818     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
819     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
820     OrigAddresses.emplace_back(First, Second);
821   }
822 }
823 
824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
825   const auto *PrivateVD =
826       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
827   QualType PrivateType = PrivateVD->getType();
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType =
838       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
839           ->getElementType();
840   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
841   if (AsArraySection) {
842     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
843                                      OrigAddresses[N].first.getPointer(CGF));
844     Size = CGF.Builder.CreateNUWAdd(
845         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
846     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
847   } else {
848     SizeInChars =
849         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
850     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851   }
852   Sizes.emplace_back(SizeInChars, Size);
853   CodeGenFunction::OpaqueValueMapping OpaqueMap(
854       CGF,
855       cast<OpaqueValueExpr>(
856           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
857       RValue::get(Size));
858   CGF.EmitVariablyModifiedType(PrivateType);
859 }
860 
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
862                                          llvm::Value *Size) {
863   const auto *PrivateVD =
864       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
865   QualType PrivateType = PrivateVD->getType();
866   if (!PrivateType->isVariablyModifiedType()) {
867     assert(!Size && !Sizes[N].second &&
868            "Size should be nullptr for non-variably modified reduction "
869            "items.");
870     return;
871   }
872   CodeGenFunction::OpaqueValueMapping OpaqueMap(
873       CGF,
874       cast<OpaqueValueExpr>(
875           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
876       RValue::get(Size));
877   CGF.EmitVariablyModifiedType(PrivateType);
878 }
879 
880 void ReductionCodeGen::emitInitialization(
881     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
882     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
883   assert(SharedAddresses.size() > N && "No variable was generated");
884   const auto *PrivateVD =
885       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
886   const OMPDeclareReductionDecl *DRD =
887       getReductionInit(ClausesData[N].ReductionOp);
888   QualType PrivateType = PrivateVD->getType();
889   PrivateAddr = CGF.Builder.CreateElementBitCast(
890       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
891   QualType SharedType = SharedAddresses[N].first.getType();
892   SharedLVal = CGF.MakeAddrLValue(
893       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
894                                        CGF.ConvertTypeForMem(SharedType)),
895       SharedType, SharedAddresses[N].first.getBaseInfo(),
896       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
897   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
898     if (DRD && DRD->getInitializer())
899       (void)DefaultInit(CGF);
900     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
901   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
902     (void)DefaultInit(CGF);
903     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
904                                      PrivateAddr, SharedLVal.getAddress(CGF),
905                                      SharedLVal.getType());
906   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
907              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
908     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
909                          PrivateVD->getType().getQualifiers(),
910                          /*IsInitializer=*/false);
911   }
912 }
913 
914 bool ReductionCodeGen::needCleanups(unsigned N) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   return DTorKind != QualType::DK_none;
920 }
921 
922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
923                                     Address PrivateAddr) {
924   const auto *PrivateVD =
925       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926   QualType PrivateType = PrivateVD->getType();
927   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
928   if (needCleanups(N)) {
929     PrivateAddr = CGF.Builder.CreateElementBitCast(
930         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
931     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
932   }
933 }
934 
935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
936                           LValue BaseLV) {
937   BaseTy = BaseTy.getNonReferenceType();
938   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
939          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
940     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
941       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
942     } else {
943       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
944       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
945     }
946     BaseTy = BaseTy->getPointeeType();
947   }
948   return CGF.MakeAddrLValue(
949       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
950                                        CGF.ConvertTypeForMem(ElTy)),
951       BaseLV.getType(), BaseLV.getBaseInfo(),
952       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
953 }
954 
955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
956                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
957                           llvm::Value *Addr) {
958   Address Tmp = Address::invalid();
959   Address TopTmp = Address::invalid();
960   Address MostTopTmp = Address::invalid();
961   BaseTy = BaseTy.getNonReferenceType();
962   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
963          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
964     Tmp = CGF.CreateMemTemp(BaseTy);
965     if (TopTmp.isValid())
966       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
967     else
968       MostTopTmp = Tmp;
969     TopTmp = Tmp;
970     BaseTy = BaseTy->getPointeeType();
971   }
972   llvm::Type *Ty = BaseLVType;
973   if (Tmp.isValid())
974     Ty = Tmp.getElementType();
975   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
976   if (Tmp.isValid()) {
977     CGF.Builder.CreateStore(Addr, Tmp);
978     return MostTopTmp;
979   }
980   return Address(Addr, BaseLVAlignment);
981 }
982 
983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
984   const VarDecl *OrigVD = nullptr;
985   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
986     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
987     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
988       Base = TempOASE->getBase()->IgnoreParenImpCasts();
989     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
990       Base = TempASE->getBase()->IgnoreParenImpCasts();
991     DE = cast<DeclRefExpr>(Base);
992     OrigVD = cast<VarDecl>(DE->getDecl());
993   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
994     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
995     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
996       Base = TempASE->getBase()->IgnoreParenImpCasts();
997     DE = cast<DeclRefExpr>(Base);
998     OrigVD = cast<VarDecl>(DE->getDecl());
999   }
1000   return OrigVD;
1001 }
1002 
1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1004                                                Address PrivateAddr) {
1005   const DeclRefExpr *DE;
1006   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1007     BaseDecls.emplace_back(OrigVD);
1008     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1009     LValue BaseLValue =
1010         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1011                     OriginalBaseLValue);
1012     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1013         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1014     llvm::Value *PrivatePointer =
1015         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1016             PrivateAddr.getPointer(),
1017             SharedAddresses[N].first.getAddress(CGF).getType());
1018     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1019     return castToBase(CGF, OrigVD->getType(),
1020                       SharedAddresses[N].first.getType(),
1021                       OriginalBaseLValue.getAddress(CGF).getType(),
1022                       OriginalBaseLValue.getAlignment(), Ptr);
1023   }
1024   BaseDecls.emplace_back(
1025       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1026   return PrivateAddr;
1027 }
1028 
1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1030   const OMPDeclareReductionDecl *DRD =
1031       getReductionInit(ClausesData[N].ReductionOp);
1032   return DRD && DRD->getInitializer();
1033 }
1034 
1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1036   return CGF.EmitLoadOfPointerLValue(
1037       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1038       getThreadIDVariable()->getType()->castAs<PointerType>());
1039 }
1040 
1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1042   if (!CGF.HaveInsertPoint())
1043     return;
1044   // 1.2.2 OpenMP Language Terminology
1045   // Structured block - An executable statement with a single entry at the
1046   // top and a single exit at the bottom.
1047   // The point of exit cannot be a branch out of the structured block.
1048   // longjmp() and throw() must not violate the entry/exit criteria.
1049   CGF.EHStack.pushTerminate();
1050   if (S)
1051     CGF.incrementProfileCounter(S);
1052   CodeGen(CGF);
1053   CGF.EHStack.popTerminate();
1054 }
1055 
1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1057     CodeGenFunction &CGF) {
1058   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1059                             getThreadIDVariable()->getType(),
1060                             AlignmentSource::Decl);
1061 }
1062 
1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1064                                        QualType FieldTy) {
1065   auto *Field = FieldDecl::Create(
1066       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1067       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1068       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1069   Field->setAccess(AS_public);
1070   DC->addDecl(Field);
1071   return Field;
1072 }
1073 
1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1075                                  StringRef Separator)
1076     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1077       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1078   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1079 
1080   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1081   OMPBuilder.initialize();
1082   loadOffloadInfoMetadata();
1083 }
1084 
1085 void CGOpenMPRuntime::clear() {
1086   InternalVars.clear();
1087   // Clean non-target variable declarations possibly used only in debug info.
1088   for (const auto &Data : EmittedNonTargetVariables) {
1089     if (!Data.getValue().pointsToAliveValue())
1090       continue;
1091     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1092     if (!GV)
1093       continue;
1094     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1095       continue;
1096     GV->eraseFromParent();
1097   }
1098 }
1099 
1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1101   SmallString<128> Buffer;
1102   llvm::raw_svector_ostream OS(Buffer);
1103   StringRef Sep = FirstSeparator;
1104   for (StringRef Part : Parts) {
1105     OS << Sep << Part;
1106     Sep = Separator;
1107   }
1108   return std::string(OS.str());
1109 }
1110 
1111 static llvm::Function *
1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1113                           const Expr *CombinerInitializer, const VarDecl *In,
1114                           const VarDecl *Out, bool IsCombiner) {
1115   // void .omp_combiner.(Ty *in, Ty *out);
1116   ASTContext &C = CGM.getContext();
1117   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1118   FunctionArgList Args;
1119   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1120                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1122                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1123   Args.push_back(&OmpOutParm);
1124   Args.push_back(&OmpInParm);
1125   const CGFunctionInfo &FnInfo =
1126       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1127   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1128   std::string Name = CGM.getOpenMPRuntime().getName(
1129       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1130   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1131                                     Name, &CGM.getModule());
1132   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1133   if (CGM.getLangOpts().Optimize) {
1134     Fn->removeFnAttr(llvm::Attribute::NoInline);
1135     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1136     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1137   }
1138   CodeGenFunction CGF(CGM);
1139   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1140   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1141   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1142                     Out->getLocation());
1143   CodeGenFunction::OMPPrivateScope Scope(CGF);
1144   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1145   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1146     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1147         .getAddress(CGF);
1148   });
1149   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1150   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   (void)Scope.Privatize();
1155   if (!IsCombiner && Out->hasInit() &&
1156       !CGF.isTrivialInitializer(Out->getInit())) {
1157     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1158                          Out->getType().getQualifiers(),
1159                          /*IsInitializer=*/true);
1160   }
1161   if (CombinerInitializer)
1162     CGF.EmitIgnoredExpr(CombinerInitializer);
1163   Scope.ForceCleanup();
1164   CGF.FinishFunction();
1165   return Fn;
1166 }
1167 
1168 void CGOpenMPRuntime::emitUserDefinedReduction(
1169     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1170   if (UDRMap.count(D) > 0)
1171     return;
1172   llvm::Function *Combiner = emitCombinerOrInitializer(
1173       CGM, D->getType(), D->getCombiner(),
1174       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1176       /*IsCombiner=*/true);
1177   llvm::Function *Initializer = nullptr;
1178   if (const Expr *Init = D->getInitializer()) {
1179     Initializer = emitCombinerOrInitializer(
1180         CGM, D->getType(),
1181         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1182                                                                      : nullptr,
1183         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1185         /*IsCombiner=*/false);
1186   }
1187   UDRMap.try_emplace(D, Combiner, Initializer);
1188   if (CGF) {
1189     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1190     Decls.second.push_back(D);
1191   }
1192 }
1193 
1194 std::pair<llvm::Function *, llvm::Function *>
1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1196   auto I = UDRMap.find(D);
1197   if (I != UDRMap.end())
1198     return I->second;
1199   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1200   return UDRMap.lookup(D);
1201 }
1202 
1203 namespace {
1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1205 // Builder if one is present.
1206 struct PushAndPopStackRAII {
1207   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1208                       bool HasCancel)
1209       : OMPBuilder(OMPBuilder) {
1210     if (!OMPBuilder)
1211       return;
1212 
1213     // The following callback is the crucial part of clangs cleanup process.
1214     //
1215     // NOTE:
1216     // Once the OpenMPIRBuilder is used to create parallel regions (and
1217     // similar), the cancellation destination (Dest below) is determined via
1218     // IP. That means if we have variables to finalize we split the block at IP,
1219     // use the new block (=BB) as destination to build a JumpDest (via
1220     // getJumpDestInCurrentScope(BB)) which then is fed to
1221     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1222     // to push & pop an FinalizationInfo object.
1223     // The FiniCB will still be needed but at the point where the
1224     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1225     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1226       assert(IP.getBlock()->end() == IP.getPoint() &&
1227              "Clang CG should cause non-terminated block!");
1228       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1229       CGF.Builder.restoreIP(IP);
1230       CodeGenFunction::JumpDest Dest =
1231           CGF.getOMPCancelDestination(OMPD_parallel);
1232       CGF.EmitBranchThroughCleanup(Dest);
1233     };
1234 
1235     // TODO: Remove this once we emit parallel regions through the
1236     //       OpenMPIRBuilder as it can do this setup internally.
1237     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1238         {FiniCB, OMPD_parallel, HasCancel});
1239     OMPBuilder->pushFinalizationCB(std::move(FI));
1240   }
1241   ~PushAndPopStackRAII() {
1242     if (OMPBuilder)
1243       OMPBuilder->popFinalizationCB();
1244   }
1245   llvm::OpenMPIRBuilder *OMPBuilder;
1246 };
1247 } // namespace
1248 
1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253   assert(ThreadIDVar->getType()->isPointerType() &&
1254          "thread id variable must be of type kmp_int32 *");
1255   CodeGenFunction CGF(CGM, true);
1256   bool HasCancel = false;
1257   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260     HasCancel = OPD->hasCancel();
1261   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262     HasCancel = OPSD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275 
1276   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277   //       parallel region to make cancellation barriers work properly.
1278   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1280   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                     HasCancel, OutlinedHelperName);
1282   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284 }
1285 
1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290   return emitParallelOrTeamsOutlinedFunction(
1291       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292 }
1293 
1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298   return emitParallelOrTeamsOutlinedFunction(
1299       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300 }
1301 
1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306     bool Tied, unsigned &NumberOfParts) {
1307   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                               PrePostActionTy &) {
1309     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311     llvm::Value *TaskArgs[] = {
1312         UpLoc, ThreadID,
1313         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                     TaskTVar->getType()->castAs<PointerType>())
1315             .getPointer(CGF)};
1316     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                         TaskArgs);
1319   };
1320   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                             UntiedCodeGen);
1322   CodeGen.setAction(Action);
1323   assert(!ThreadIDVar->getType()->isPointerType() &&
1324          "thread id variable must be of type kmp_int32 for tasks");
1325   const OpenMPDirectiveKind Region =
1326       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                       : OMPD_task;
1328   const CapturedStmt *CS = D.getCapturedStmt(Region);
1329   bool HasCancel = false;
1330   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338 
1339   CodeGenFunction CGF(CGM, true);
1340   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                         InnermostKind, HasCancel, Action);
1342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344   if (!Tied)
1345     NumberOfParts = Action.getNumberOfParts();
1346   return Res;
1347 }
1348 
1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                              const RecordDecl *RD, const CGRecordLayout &RL,
1351                              ArrayRef<llvm::Constant *> Data) {
1352   llvm::StructType *StructTy = RL.getLLVMType();
1353   unsigned PrevIdx = 0;
1354   ConstantInitBuilder CIBuilder(CGM);
1355   auto DI = Data.begin();
1356   for (const FieldDecl *FD : RD->fields()) {
1357     unsigned Idx = RL.getLLVMFieldNo(FD);
1358     // Fill the alignment.
1359     for (unsigned I = PrevIdx; I < Idx; ++I)
1360       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361     PrevIdx = Idx + 1;
1362     Fields.add(*DI);
1363     ++DI;
1364   }
1365 }
1366 
1367 template <class... As>
1368 static llvm::GlobalVariable *
1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                    As &&... Args) {
1372   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374   ConstantInitBuilder CIBuilder(CGM);
1375   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376   buildStructValue(Fields, CGM, RD, RL, Data);
1377   return Fields.finishAndCreateGlobal(
1378       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379       std::forward<As>(Args)...);
1380 }
1381 
1382 template <typename T>
1383 static void
1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                          ArrayRef<llvm::Constant *> Data,
1386                                          T &Parent) {
1387   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390   buildStructValue(Fields, CGM, RD, RL, Data);
1391   Fields.finishAndAddTo(Parent);
1392 }
1393 
1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                              bool AtCurrentPoint) {
1396   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398 
1399   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400   if (AtCurrentPoint) {
1401     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403   } else {
1404     Elem.second.ServiceInsertPt =
1405         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407   }
1408 }
1409 
1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412   if (Elem.second.ServiceInsertPt) {
1413     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414     Elem.second.ServiceInsertPt = nullptr;
1415     Ptr->eraseFromParent();
1416   }
1417 }
1418 
1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                   SourceLocation Loc,
1421                                                   SmallString<128> &Buffer) {
1422   llvm::raw_svector_ostream OS(Buffer);
1423   // Build debug location
1424   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425   OS << ";" << PLoc.getFilename() << ";";
1426   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427     OS << FD->getQualifiedNameAsString();
1428   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429   return OS.str();
1430 }
1431 
1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                  SourceLocation Loc,
1434                                                  unsigned Flags) {
1435   llvm::Constant *SrcLocStr;
1436   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437       Loc.isInvalid()) {
1438     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439   } else {
1440     std::string FunctionName = "";
1441     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442       FunctionName = FD->getQualifiedNameAsString();
1443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444     const char *FileName = PLoc.getFilename();
1445     unsigned Line = PLoc.getLine();
1446     unsigned Column = PLoc.getColumn();
1447     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                 Line, Column);
1449   }
1450   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                      Reserved2Flags);
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                           SourceLocation Loc) {
1457   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459   // the clang invariants used below might be broken.
1460   if (CGM.getLangOpts().OpenMPIRBuilder) {
1461     SmallString<128> Buffer;
1462     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                             : "__kmpc_for_static_init_4u")
1564                                 : (IVSigned ? "__kmpc_for_static_init_8"
1565                                             : "__kmpc_for_static_init_8u");
1566   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568   llvm::Type *TypeParams[] = {
1569     getIdentTyPointerTy(),                     // loc
1570     CGM.Int32Ty,                               // tid
1571     CGM.Int32Ty,                               // schedtype
1572     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573     PtrTy,                                     // p_lower
1574     PtrTy,                                     // p_upper
1575     PtrTy,                                     // p_stride
1576     ITy,                                       // incr
1577     ITy                                        // chunk
1578   };
1579   auto *FnTy =
1580       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581   return CGM.CreateRuntimeFunction(FnTy, Name);
1582 }
1583 
1584 llvm::FunctionCallee
1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586   assert((IVSize == 32 || IVSize == 64) &&
1587          "IV size is not compatible with the omp runtime");
1588   StringRef Name =
1589       IVSize == 32
1590           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                                CGM.Int32Ty,           // tid
1595                                CGM.Int32Ty,           // schedtype
1596                                ITy,                   // lower
1597                                ITy,                   // upper
1598                                ITy,                   // stride
1599                                ITy                    // chunk
1600   };
1601   auto *FnTy =
1602       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603   return CGM.CreateRuntimeFunction(FnTy, Name);
1604 }
1605 
1606 llvm::FunctionCallee
1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608   assert((IVSize == 32 || IVSize == 64) &&
1609          "IV size is not compatible with the omp runtime");
1610   StringRef Name =
1611       IVSize == 32
1612           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614   llvm::Type *TypeParams[] = {
1615       getIdentTyPointerTy(), // loc
1616       CGM.Int32Ty,           // tid
1617   };
1618   auto *FnTy =
1619       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620   return CGM.CreateRuntimeFunction(FnTy, Name);
1621 }
1622 
1623 llvm::FunctionCallee
1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625   assert((IVSize == 32 || IVSize == 64) &&
1626          "IV size is not compatible with the omp runtime");
1627   StringRef Name =
1628       IVSize == 32
1629           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633   llvm::Type *TypeParams[] = {
1634     getIdentTyPointerTy(),                     // loc
1635     CGM.Int32Ty,                               // tid
1636     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637     PtrTy,                                     // p_lower
1638     PtrTy,                                     // p_upper
1639     PtrTy                                      // p_stride
1640   };
1641   auto *FnTy =
1642       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643   return CGM.CreateRuntimeFunction(FnTy, Name);
1644 }
1645 
1646 /// Obtain information that uniquely identifies a target entry. This
1647 /// consists of the file and device IDs as well as line number associated with
1648 /// the relevant entry source location.
1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                      unsigned &DeviceID, unsigned &FileID,
1651                                      unsigned &LineNum) {
1652   SourceManager &SM = C.getSourceManager();
1653 
1654   // The loc should be always valid and have a file ID (the user cannot use
1655   // #pragma directives in macros)
1656 
1657   assert(Loc.isValid() && "Source location is expected to be always valid.");
1658 
1659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661 
1662   llvm::sys::fs::UniqueID ID;
1663   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668           << PLoc.getFilename() << EC.message();
1669   }
1670 
1671   DeviceID = ID.getDevice();
1672   FileID = ID.getFile();
1673   LineNum = PLoc.getLine();
1674 }
1675 
1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677   if (CGM.getLangOpts().OpenMPSimd)
1678     return Address::invalid();
1679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683                HasRequiresUnifiedSharedMemory))) {
1684     SmallString<64> PtrName;
1685     {
1686       llvm::raw_svector_ostream OS(PtrName);
1687       OS << CGM.getMangledName(GlobalDecl(VD));
1688       if (!VD->isExternallyVisible()) {
1689         unsigned DeviceID, FileID, Line;
1690         getTargetEntryUniqueInfo(CGM.getContext(),
1691                                  VD->getCanonicalDecl()->getBeginLoc(),
1692                                  DeviceID, FileID, Line);
1693         OS << llvm::format("_%x", FileID);
1694       }
1695       OS << "_decl_tgt_ref_ptr";
1696     }
1697     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698     if (!Ptr) {
1699       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                         PtrName);
1702 
1703       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705 
1706       if (!CGM.getLangOpts().OpenMPIsDevice)
1707         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709     }
1710     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711   }
1712   return Address::invalid();
1713 }
1714 
1715 llvm::Constant *
1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718          !CGM.getContext().getTargetInfo().isTLSSupported());
1719   // Lookup the entry, lazily creating it if necessary.
1720   std::string Suffix = getName({"cache", ""});
1721   return getOrCreateInternalVariable(
1722       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                 const VarDecl *VD,
1727                                                 Address VDAddr,
1728                                                 SourceLocation Loc) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return VDAddr;
1732 
1733   llvm::Type *VarTy = VDAddr.getElementType();
1734   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                        CGM.Int8PtrTy),
1737                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                          getOrCreateThreadPrivateCache(VD)};
1739   return Address(CGF.EmitRuntimeCall(
1740                      OMPBuilder.getOrCreateRuntimeFunction(
1741                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                      Args),
1743                  VDAddr.getAlignment());
1744 }
1745 
1746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750   // library.
1751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                       OMPLoc);
1755   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756   // to register constructor/destructor for variable.
1757   llvm::Value *Args[] = {
1758       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759       Ctor, CopyCtor, Dtor};
1760   CGF.EmitRuntimeCall(
1761       OMPBuilder.getOrCreateRuntimeFunction(
1762           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763       Args);
1764 }
1765 
1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768     bool PerformInit, CodeGenFunction *CGF) {
1769   if (CGM.getLangOpts().OpenMPUseTLS &&
1770       CGM.getContext().getTargetInfo().isTLSSupported())
1771     return nullptr;
1772 
1773   VD = VD->getDefinition(CGM.getContext());
1774   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775     QualType ASTTy = VD->getType();
1776 
1777     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778     const Expr *Init = VD->getAnyInitializer();
1779     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780       // Generate function that re-emits the declaration's initializer into the
1781       // threadprivate copy of the variable VD
1782       CodeGenFunction CtorCGF(CGM);
1783       FunctionArgList Args;
1784       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                             ImplicitParamDecl::Other);
1787       Args.push_back(&Dst);
1788 
1789       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790           CGM.getContext().VoidPtrTy, Args);
1791       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792       std::string Name = getName({"__kmpc_global_ctor_", ""});
1793       llvm::Function *Fn =
1794           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                             Args, Loc, Loc);
1797       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799           CGM.getContext().VoidPtrTy, Dst.getLocation());
1800       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801       Arg = CtorCGF.Builder.CreateElementBitCast(
1802           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                                /*IsInitializer=*/true);
1805       ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809       CtorCGF.FinishFunction();
1810       Ctor = Fn;
1811     }
1812     if (VD->getType().isDestructedType() != QualType::DK_none) {
1813       // Generate function that emits destructor call for the threadprivate copy
1814       // of the variable VD
1815       CodeGenFunction DtorCGF(CGM);
1816       FunctionArgList Args;
1817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                             ImplicitParamDecl::Other);
1820       Args.push_back(&Dst);
1821 
1822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825       std::string Name = getName({"__kmpc_global_dtor_", ""});
1826       llvm::Function *Fn =
1827           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                             Loc, Loc);
1831       // Create a scope with an artificial location for the body of this function.
1832       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834           DtorCGF.GetAddrOfLocalVar(&Dst),
1835           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839       DtorCGF.FinishFunction();
1840       Dtor = Fn;
1841     }
1842     // Do not emit init function if it is not required.
1843     if (!Ctor && !Dtor)
1844       return nullptr;
1845 
1846     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                                /*isVarArg=*/false)
1849                            ->getPointerTo();
1850     // Copying constructor for the threadprivate variable.
1851     // Must be NULL - reserved by runtime, but currently it requires that this
1852     // parameter is always NULL. Otherwise it fires assertion.
1853     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854     if (Ctor == nullptr) {
1855       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Dtor = llvm::Constant::getNullValue(DtorTy);
1865     }
1866     if (!CGF) {
1867       auto *InitFunctionTy =
1868           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869       std::string Name = getName({"__omp_threadprivate_init_", ""});
1870       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872       CodeGenFunction InitCGF(CGM);
1873       FunctionArgList ArgList;
1874       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                             Loc, Loc);
1877       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878       InitCGF.FinishFunction();
1879       return InitFunction;
1880     }
1881     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882   }
1883   return nullptr;
1884 }
1885 
1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                      llvm::GlobalVariable *Addr,
1888                                                      bool PerformInit) {
1889   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890       !CGM.getLangOpts().OpenMPIsDevice)
1891     return false;
1892   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896        HasRequiresUnifiedSharedMemory))
1897     return CGM.getLangOpts().OpenMPIsDevice;
1898   VD = VD->getDefinition(CGM.getContext());
1899   assert(VD && "Unknown VarDecl");
1900 
1901   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903 
1904   QualType ASTTy = VD->getType();
1905   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906 
1907   // Produce the unique prefix to identify the new target regions. We use
1908   // the source location of the variable declaration which we know to not
1909   // conflict with any target region.
1910   unsigned DeviceID;
1911   unsigned FileID;
1912   unsigned Line;
1913   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914   SmallString<128> Buffer, Out;
1915   {
1916     llvm::raw_svector_ostream OS(Buffer);
1917     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919   }
1920 
1921   const Expr *Init = VD->getAnyInitializer();
1922   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923     llvm::Constant *Ctor;
1924     llvm::Constant *ID;
1925     if (CGM.getLangOpts().OpenMPIsDevice) {
1926       // Generate function that re-emits the declaration's initializer into
1927       // the threadprivate copy of the variable VD
1928       CodeGenFunction CtorCGF(CGM);
1929 
1930       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938       CtorCGF.EmitAnyExprToMem(Init,
1939                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                                Init->getType().getQualifiers(),
1941                                /*IsInitializer=*/true);
1942       CtorCGF.FinishFunction();
1943       Ctor = Fn;
1944       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946     } else {
1947       Ctor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951       ID = Ctor;
1952     }
1953 
1954     // Register the information for the entry associated with the constructor.
1955     Out.clear();
1956     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959   }
1960   if (VD->getType().isDestructedType() != QualType::DK_none) {
1961     llvm::Constant *Dtor;
1962     llvm::Constant *ID;
1963     if (CGM.getLangOpts().OpenMPIsDevice) {
1964       // Generate function that emits destructor call for the threadprivate
1965       // copy of the variable VD
1966       CodeGenFunction DtorCGF(CGM);
1967 
1968       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                             FunctionArgList(), Loc, Loc);
1975       // Create a scope with an artificial location for the body of this
1976       // function.
1977       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981       DtorCGF.FinishFunction();
1982       Dtor = Fn;
1983       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985     } else {
1986       Dtor = new llvm::GlobalVariable(
1987           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988           llvm::GlobalValue::PrivateLinkage,
1989           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990       ID = Dtor;
1991     }
1992     // Register the information for the entry associated with the destructor.
1993     Out.clear();
1994     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997   }
1998   return CGM.getLangOpts().OpenMPIsDevice;
1999 }
2000 
2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                           QualType VarType,
2003                                                           StringRef Name) {
2004   std::string Suffix = getName({"artificial", ""});
2005   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006   llvm::Value *GAddr =
2007       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009       CGM.getTarget().isTLSSupported()) {
2010     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012   }
2013   std::string CacheSuffix = getName({"cache", ""});
2014   llvm::Value *Args[] = {
2015       emitUpdateLocation(CGF, SourceLocation()),
2016       getThreadID(CGF, SourceLocation()),
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                 /*isSigned=*/false),
2020       getOrCreateInternalVariable(
2021           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022   return Address(
2023       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024           CGF.EmitRuntimeCall(
2025               OMPBuilder.getOrCreateRuntimeFunction(
2026                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027               Args),
2028           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029       CGM.getContext().getTypeAlignInChars(VarType));
2030 }
2031 
2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                    const RegionCodeGenTy &ThenGen,
2034                                    const RegionCodeGenTy &ElseGen) {
2035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036 
2037   // If the condition constant folds and can be elided, try to avoid emitting
2038   // the condition and the dead arm of the if/else.
2039   bool CondConstant;
2040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041     if (CondConstant)
2042       ThenGen(CGF);
2043     else
2044       ElseGen(CGF);
2045     return;
2046   }
2047 
2048   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049   // emit the conditional branch.
2050   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054 
2055   // Emit the 'then' code.
2056   CGF.EmitBlock(ThenBlock);
2057   ThenGen(CGF);
2058   CGF.EmitBranch(ContBlock);
2059   // Emit the 'else' code if present.
2060   // There is no need to emit line number for unconditional branch.
2061   (void)ApplyDebugLocation::CreateEmpty(CGF);
2062   CGF.EmitBlock(ElseBlock);
2063   ElseGen(CGF);
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBranch(ContBlock);
2067   // Emit the continuation block for code after the if.
2068   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 }
2070 
2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                        llvm::Function *OutlinedFn,
2073                                        ArrayRef<llvm::Value *> CapturedVars,
2074                                        const Expr *IfCond) {
2075   if (!CGF.HaveInsertPoint())
2076     return;
2077   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078   auto &M = CGM.getModule();
2079   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083     llvm::Value *Args[] = {
2084         RTLoc,
2085         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088     RealArgs.append(std::begin(Args), std::end(Args));
2089     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 
2091     llvm::FunctionCallee RTLFn =
2092         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094   };
2095   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099     // Build calls:
2100     // __kmpc_serialized_parallel(&Loc, GTid);
2101     llvm::Value *Args[] = {RTLoc, ThreadID};
2102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                             M, OMPRTL___kmpc_serialized_parallel),
2104                         Args);
2105 
2106     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108     Address ZeroAddrBound =
2109         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                          /*Name=*/".bound.zero.addr");
2111     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113     // ThreadId for serialized parallels is 0.
2114     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 
2118     // Ensure we do not inline the function. This is trivially true for the ones
2119     // passed to __kmpc_fork_call but the ones calles in serialized regions
2120     // could be inlined. This is not a perfect but it is closer to the invariant
2121     // we want, namely, every data environment starts with a new function.
2122     // TODO: We should pass the if condition to the runtime function and do the
2123     //       handling there. Much cleaner code.
2124     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126 
2127     // __kmpc_end_serialized_parallel(&Loc, GTid);
2128     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                             M, OMPRTL___kmpc_end_serialized_parallel),
2131                         EndArgs);
2132   };
2133   if (IfCond) {
2134     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135   } else {
2136     RegionCodeGenTy ThenRCG(ThenGen);
2137     ThenRCG(CGF);
2138   }
2139 }
2140 
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                              SourceLocation Loc) {
2149   if (auto *OMPRegionInfo =
2150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151     if (OMPRegionInfo->getThreadIDVariable())
2152       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153 
2154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155   QualType Int32Ty =
2156       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158   CGF.EmitStoreOfScalar(ThreadID,
2159                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160 
2161   return ThreadIDTemp;
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166   SmallString<256> Buffer;
2167   llvm::raw_svector_ostream Out(Buffer);
2168   Out << Name;
2169   StringRef RuntimeName = Out.str();
2170   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171   if (Elem.second) {
2172     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173            "OMP internal variable has different type than requested");
2174     return &*Elem.second;
2175   }
2176 
2177   return Elem.second = new llvm::GlobalVariable(
2178              CGM.getModule(), Ty, /*IsConstant*/ false,
2179              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180              Elem.first(), /*InsertBefore=*/nullptr,
2181              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182 }
2183 
2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186   std::string Name = getName({Prefix, "var"});
2187   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188 }
2189 
2190 namespace {
2191 /// Common pre(post)-action for different OpenMP constructs.
2192 class CommonActionTy final : public PrePostActionTy {
2193   llvm::FunctionCallee EnterCallee;
2194   ArrayRef<llvm::Value *> EnterArgs;
2195   llvm::FunctionCallee ExitCallee;
2196   ArrayRef<llvm::Value *> ExitArgs;
2197   bool Conditional;
2198   llvm::BasicBlock *ContBlock = nullptr;
2199 
2200 public:
2201   CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                  ArrayRef<llvm::Value *> EnterArgs,
2203                  llvm::FunctionCallee ExitCallee,
2204                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206         ExitArgs(ExitArgs), Conditional(Conditional) {}
2207   void Enter(CodeGenFunction &CGF) override {
2208     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209     if (Conditional) {
2210       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212       ContBlock = CGF.createBasicBlock("omp_if.end");
2213       // Generate the branch (If-stmt)
2214       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215       CGF.EmitBlock(ThenBlock);
2216     }
2217   }
2218   void Done(CodeGenFunction &CGF) {
2219     // Emit the rest of blocks/branches
2220     CGF.EmitBranch(ContBlock);
2221     CGF.EmitBlock(ContBlock, true);
2222   }
2223   void Exit(CodeGenFunction &CGF) override {
2224     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225   }
2226 };
2227 } // anonymous namespace
2228 
2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                          StringRef CriticalName,
2231                                          const RegionCodeGenTy &CriticalOpGen,
2232                                          SourceLocation Loc, const Expr *Hint) {
2233   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234   // CriticalOpGen();
2235   // __kmpc_end_critical(ident_t *, gtid, Lock);
2236   // Prepare arguments and build a call to __kmpc_critical
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                          getCriticalRegionLock(CriticalName)};
2241   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                 std::end(Args));
2243   if (Hint) {
2244     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246   }
2247   CommonActionTy Action(
2248       OMPBuilder.getOrCreateRuntimeFunction(
2249           CGM.getModule(),
2250           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251       EnterArgs,
2252       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                             OMPRTL___kmpc_end_critical),
2254       Args);
2255   CriticalOpGen.setAction(Action);
2256   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257 }
2258 
2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                        const RegionCodeGenTy &MasterOpGen,
2261                                        SourceLocation Loc) {
2262   if (!CGF.HaveInsertPoint())
2263     return;
2264   // if(__kmpc_master(ident_t *, gtid)) {
2265   //   MasterOpGen();
2266   //   __kmpc_end_master(ident_t *, gtid);
2267   // }
2268   // Prepare arguments and build a call to __kmpc_master
2269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                             CGM.getModule(), OMPRTL___kmpc_master),
2272                         Args,
2273                         OMPBuilder.getOrCreateRuntimeFunction(
2274                             CGM.getModule(), OMPRTL___kmpc_end_master),
2275                         Args,
2276                         /*Conditional=*/true);
2277   MasterOpGen.setAction(Action);
2278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279   Action.Done(CGF);
2280 }
2281 
2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2283                                        const RegionCodeGenTy &MaskedOpGen,
2284                                        SourceLocation Loc, const Expr *Filter) {
2285   if (!CGF.HaveInsertPoint())
2286     return;
2287   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2288   //   MaskedOpGen();
2289   //   __kmpc_end_masked(iden_t *, gtid);
2290   // }
2291   // Prepare arguments and build a call to __kmpc_masked
2292   llvm::Value *FilterVal = Filter
2293                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2294                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2296                          FilterVal};
2297   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2298                             getThreadID(CGF, Loc)};
2299   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_masked),
2301                         Args,
2302                         OMPBuilder.getOrCreateRuntimeFunction(
2303                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2304                         ArgsEnd,
2305                         /*Conditional=*/true);
2306   MaskedOpGen.setAction(Action);
2307   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2308   Action.Done(CGF);
2309 }
2310 
2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2312                                         SourceLocation Loc) {
2313   if (!CGF.HaveInsertPoint())
2314     return;
2315   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2316     OMPBuilder.createTaskyield(CGF.Builder);
2317   } else {
2318     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2319     llvm::Value *Args[] = {
2320         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2321         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2323                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2324                         Args);
2325   }
2326 
2327   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2328     Region->emitUntiedSwitch(CGF);
2329 }
2330 
2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2332                                           const RegionCodeGenTy &TaskgroupOpGen,
2333                                           SourceLocation Loc) {
2334   if (!CGF.HaveInsertPoint())
2335     return;
2336   // __kmpc_taskgroup(ident_t *, gtid);
2337   // TaskgroupOpGen();
2338   // __kmpc_end_taskgroup(ident_t *, gtid);
2339   // Prepare arguments and build a call to __kmpc_taskgroup
2340   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2341   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2342                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2343                         Args,
2344                         OMPBuilder.getOrCreateRuntimeFunction(
2345                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2346                         Args);
2347   TaskgroupOpGen.setAction(Action);
2348   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2349 }
2350 
2351 /// Given an array of pointers to variables, project the address of a
2352 /// given variable.
2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2354                                       unsigned Index, const VarDecl *Var) {
2355   // Pull out the pointer to the variable.
2356   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2357   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2358 
2359   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2360   Addr = CGF.Builder.CreateElementBitCast(
2361       Addr, CGF.ConvertTypeForMem(Var->getType()));
2362   return Addr;
2363 }
2364 
2365 static llvm::Value *emitCopyprivateCopyFunction(
2366     CodeGenModule &CGM, llvm::Type *ArgsType,
2367     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2368     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2369     SourceLocation Loc) {
2370   ASTContext &C = CGM.getContext();
2371   // void copy_func(void *LHSArg, void *RHSArg);
2372   FunctionArgList Args;
2373   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2374                            ImplicitParamDecl::Other);
2375   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2376                            ImplicitParamDecl::Other);
2377   Args.push_back(&LHSArg);
2378   Args.push_back(&RHSArg);
2379   const auto &CGFI =
2380       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2381   std::string Name =
2382       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2383   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2384                                     llvm::GlobalValue::InternalLinkage, Name,
2385                                     &CGM.getModule());
2386   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2387   Fn->setDoesNotRecurse();
2388   CodeGenFunction CGF(CGM);
2389   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2390   // Dest = (void*[n])(LHSArg);
2391   // Src = (void*[n])(RHSArg);
2392   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2393       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2394       ArgsType), CGF.getPointerAlign());
2395   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2397       ArgsType), CGF.getPointerAlign());
2398   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2399   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2400   // ...
2401   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2402   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2403     const auto *DestVar =
2404         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2405     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2406 
2407     const auto *SrcVar =
2408         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2409     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2410 
2411     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2412     QualType Type = VD->getType();
2413     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2414   }
2415   CGF.FinishFunction();
2416   return Fn;
2417 }
2418 
2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2420                                        const RegionCodeGenTy &SingleOpGen,
2421                                        SourceLocation Loc,
2422                                        ArrayRef<const Expr *> CopyprivateVars,
2423                                        ArrayRef<const Expr *> SrcExprs,
2424                                        ArrayRef<const Expr *> DstExprs,
2425                                        ArrayRef<const Expr *> AssignmentOps) {
2426   if (!CGF.HaveInsertPoint())
2427     return;
2428   assert(CopyprivateVars.size() == SrcExprs.size() &&
2429          CopyprivateVars.size() == DstExprs.size() &&
2430          CopyprivateVars.size() == AssignmentOps.size());
2431   ASTContext &C = CGM.getContext();
2432   // int32 did_it = 0;
2433   // if(__kmpc_single(ident_t *, gtid)) {
2434   //   SingleOpGen();
2435   //   __kmpc_end_single(ident_t *, gtid);
2436   //   did_it = 1;
2437   // }
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440 
2441   Address DidIt = Address::invalid();
2442   if (!CopyprivateVars.empty()) {
2443     // int32 did_it = 0;
2444     QualType KmpInt32Ty =
2445         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2446     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2447     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2448   }
2449   // Prepare arguments and build a call to __kmpc_single
2450   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2451   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2452                             CGM.getModule(), OMPRTL___kmpc_single),
2453                         Args,
2454                         OMPBuilder.getOrCreateRuntimeFunction(
2455                             CGM.getModule(), OMPRTL___kmpc_end_single),
2456                         Args,
2457                         /*Conditional=*/true);
2458   SingleOpGen.setAction(Action);
2459   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2460   if (DidIt.isValid()) {
2461     // did_it = 1;
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2463   }
2464   Action.Done(CGF);
2465   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2466   // <copy_func>, did_it);
2467   if (DidIt.isValid()) {
2468     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2469     QualType CopyprivateArrayTy = C.getConstantArrayType(
2470         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2471         /*IndexTypeQuals=*/0);
2472     // Create a list of all private variables for copyprivate.
2473     Address CopyprivateList =
2474         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2475     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2476       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2477       CGF.Builder.CreateStore(
2478           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2479               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2480               CGF.VoidPtrTy),
2481           Elem);
2482     }
2483     // Build function that copies private values from single region to all other
2484     // threads in the corresponding parallel region.
2485     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2486         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2487         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2488     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2489     Address CL =
2490       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2491                                                       CGF.VoidPtrTy);
2492     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2493     llvm::Value *Args[] = {
2494         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2495         getThreadID(CGF, Loc),        // i32 <gtid>
2496         BufSize,                      // size_t <buf_size>
2497         CL.getPointer(),              // void *<copyprivate list>
2498         CpyFn,                        // void (*) (void *, void *) <copy_func>
2499         DidItVal                      // i32 did_it
2500     };
2501     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2502                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2503                         Args);
2504   }
2505 }
2506 
2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2508                                         const RegionCodeGenTy &OrderedOpGen,
2509                                         SourceLocation Loc, bool IsThreads) {
2510   if (!CGF.HaveInsertPoint())
2511     return;
2512   // __kmpc_ordered(ident_t *, gtid);
2513   // OrderedOpGen();
2514   // __kmpc_end_ordered(ident_t *, gtid);
2515   // Prepare arguments and build a call to __kmpc_ordered
2516   if (IsThreads) {
2517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2518     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2519                               CGM.getModule(), OMPRTL___kmpc_ordered),
2520                           Args,
2521                           OMPBuilder.getOrCreateRuntimeFunction(
2522                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2523                           Args);
2524     OrderedOpGen.setAction(Action);
2525     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2526     return;
2527   }
2528   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2529 }
2530 
2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2532   unsigned Flags;
2533   if (Kind == OMPD_for)
2534     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2535   else if (Kind == OMPD_sections)
2536     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2537   else if (Kind == OMPD_single)
2538     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2539   else if (Kind == OMPD_barrier)
2540     Flags = OMP_IDENT_BARRIER_EXPL;
2541   else
2542     Flags = OMP_IDENT_BARRIER_IMPL;
2543   return Flags;
2544 }
2545 
2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2547     CodeGenFunction &CGF, const OMPLoopDirective &S,
2548     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2549   // Check if the loop directive is actually a doacross loop directive. In this
2550   // case choose static, 1 schedule.
2551   if (llvm::any_of(
2552           S.getClausesOfKind<OMPOrderedClause>(),
2553           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2554     ScheduleKind = OMPC_SCHEDULE_static;
2555     // Chunk size is 1 in this case.
2556     llvm::APInt ChunkSize(32, 1);
2557     ChunkExpr = IntegerLiteral::Create(
2558         CGF.getContext(), ChunkSize,
2559         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2560         SourceLocation());
2561   }
2562 }
2563 
2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2565                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2566                                       bool ForceSimpleCall) {
2567   // Check if we should use the OMPBuilder
2568   auto *OMPRegionInfo =
2569       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2570   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2571     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2572         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2573     return;
2574   }
2575 
2576   if (!CGF.HaveInsertPoint())
2577     return;
2578   // Build call __kmpc_cancel_barrier(loc, thread_id);
2579   // Build call __kmpc_barrier(loc, thread_id);
2580   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2581   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2582   // thread_id);
2583   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2584                          getThreadID(CGF, Loc)};
2585   if (OMPRegionInfo) {
2586     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2587       llvm::Value *Result = CGF.EmitRuntimeCall(
2588           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2589                                                 OMPRTL___kmpc_cancel_barrier),
2590           Args);
2591       if (EmitChecks) {
2592         // if (__kmpc_cancel_barrier()) {
2593         //   exit from construct;
2594         // }
2595         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2596         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2597         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2598         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2599         CGF.EmitBlock(ExitBB);
2600         //   exit from construct;
2601         CodeGenFunction::JumpDest CancelDestination =
2602             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2603         CGF.EmitBranchThroughCleanup(CancelDestination);
2604         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2605       }
2606       return;
2607     }
2608   }
2609   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2610                           CGM.getModule(), OMPRTL___kmpc_barrier),
2611                       Args);
2612 }
2613 
2614 /// Map the OpenMP loop schedule to the runtime enumeration.
2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2616                                           bool Chunked, bool Ordered) {
2617   switch (ScheduleKind) {
2618   case OMPC_SCHEDULE_static:
2619     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2620                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2621   case OMPC_SCHEDULE_dynamic:
2622     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2623   case OMPC_SCHEDULE_guided:
2624     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2625   case OMPC_SCHEDULE_runtime:
2626     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2627   case OMPC_SCHEDULE_auto:
2628     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2629   case OMPC_SCHEDULE_unknown:
2630     assert(!Chunked && "chunk was specified but schedule kind not known");
2631     return Ordered ? OMP_ord_static : OMP_sch_static;
2632   }
2633   llvm_unreachable("Unexpected runtime schedule");
2634 }
2635 
2636 /// Map the OpenMP distribute schedule to the runtime enumeration.
2637 static OpenMPSchedType
2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2639   // only static is allowed for dist_schedule
2640   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2641 }
2642 
2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2644                                          bool Chunked) const {
2645   OpenMPSchedType Schedule =
2646       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2647   return Schedule == OMP_sch_static;
2648 }
2649 
2650 bool CGOpenMPRuntime::isStaticNonchunked(
2651     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2652   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2653   return Schedule == OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                       bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static_chunked;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticChunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static_chunked;
2667 }
2668 
2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2672   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2673   return Schedule != OMP_sch_static;
2674 }
2675 
2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2677                                   OpenMPScheduleClauseModifier M1,
2678                                   OpenMPScheduleClauseModifier M2) {
2679   int Modifier = 0;
2680   switch (M1) {
2681   case OMPC_SCHEDULE_MODIFIER_monotonic:
2682     Modifier = OMP_sch_modifier_monotonic;
2683     break;
2684   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2685     Modifier = OMP_sch_modifier_nonmonotonic;
2686     break;
2687   case OMPC_SCHEDULE_MODIFIER_simd:
2688     if (Schedule == OMP_sch_static_chunked)
2689       Schedule = OMP_sch_static_balanced_chunked;
2690     break;
2691   case OMPC_SCHEDULE_MODIFIER_last:
2692   case OMPC_SCHEDULE_MODIFIER_unknown:
2693     break;
2694   }
2695   switch (M2) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2711   // If the static schedule kind is specified or if the ordered clause is
2712   // specified, and if the nonmonotonic modifier is not specified, the effect is
2713   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2714   // modifier is specified, the effect is as if the nonmonotonic modifier is
2715   // specified.
2716   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2717     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2718           Schedule == OMP_sch_static_balanced_chunked ||
2719           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2720           Schedule == OMP_dist_sch_static_chunked ||
2721           Schedule == OMP_dist_sch_static))
2722       Modifier = OMP_sch_modifier_nonmonotonic;
2723   }
2724   return Schedule | Modifier;
2725 }
2726 
2727 void CGOpenMPRuntime::emitForDispatchInit(
2728     CodeGenFunction &CGF, SourceLocation Loc,
2729     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2730     bool Ordered, const DispatchRTInput &DispatchValues) {
2731   if (!CGF.HaveInsertPoint())
2732     return;
2733   OpenMPSchedType Schedule = getRuntimeSchedule(
2734       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2735   assert(Ordered ||
2736          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2737           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2738           Schedule != OMP_sch_static_balanced_chunked));
2739   // Call __kmpc_dispatch_init(
2740   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2741   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2742   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2743 
2744   // If the Chunk was not specified in the clause - use default value 1.
2745   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2746                                             : CGF.Builder.getIntN(IVSize, 1);
2747   llvm::Value *Args[] = {
2748       emitUpdateLocation(CGF, Loc),
2749       getThreadID(CGF, Loc),
2750       CGF.Builder.getInt32(addMonoNonMonoModifier(
2751           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2752       DispatchValues.LB,                                     // Lower
2753       DispatchValues.UB,                                     // Upper
2754       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2755       Chunk                                                  // Chunk
2756   };
2757   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2758 }
2759 
2760 static void emitForStaticInitCall(
2761     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2762     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2763     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2764     const CGOpenMPRuntime::StaticRTInput &Values) {
2765   if (!CGF.HaveInsertPoint())
2766     return;
2767 
2768   assert(!Values.Ordered);
2769   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2770          Schedule == OMP_sch_static_balanced_chunked ||
2771          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2772          Schedule == OMP_dist_sch_static ||
2773          Schedule == OMP_dist_sch_static_chunked);
2774 
2775   // Call __kmpc_for_static_init(
2776   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2777   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2778   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2779   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2780   llvm::Value *Chunk = Values.Chunk;
2781   if (Chunk == nullptr) {
2782     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2783             Schedule == OMP_dist_sch_static) &&
2784            "expected static non-chunked schedule");
2785     // If the Chunk was not specified in the clause - use default value 1.
2786     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2787   } else {
2788     assert((Schedule == OMP_sch_static_chunked ||
2789             Schedule == OMP_sch_static_balanced_chunked ||
2790             Schedule == OMP_ord_static_chunked ||
2791             Schedule == OMP_dist_sch_static_chunked) &&
2792            "expected static chunked schedule");
2793   }
2794   llvm::Value *Args[] = {
2795       UpdateLocation,
2796       ThreadId,
2797       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2798                                                   M2)), // Schedule type
2799       Values.IL.getPointer(),                           // &isLastIter
2800       Values.LB.getPointer(),                           // &LB
2801       Values.UB.getPointer(),                           // &UB
2802       Values.ST.getPointer(),                           // &Stride
2803       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2804       Chunk                                             // Chunk
2805   };
2806   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2807 }
2808 
2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2810                                         SourceLocation Loc,
2811                                         OpenMPDirectiveKind DKind,
2812                                         const OpenMPScheduleTy &ScheduleKind,
2813                                         const StaticRTInput &Values) {
2814   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2815       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2816   assert(isOpenMPWorksharingDirective(DKind) &&
2817          "Expected loop-based or sections-based directive.");
2818   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2819                                              isOpenMPLoopDirective(DKind)
2820                                                  ? OMP_IDENT_WORK_LOOP
2821                                                  : OMP_IDENT_WORK_SECTIONS);
2822   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2823   llvm::FunctionCallee StaticInitFunction =
2824       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2825   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2827                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2828 }
2829 
2830 void CGOpenMPRuntime::emitDistributeStaticInit(
2831     CodeGenFunction &CGF, SourceLocation Loc,
2832     OpenMPDistScheduleClauseKind SchedKind,
2833     const CGOpenMPRuntime::StaticRTInput &Values) {
2834   OpenMPSchedType ScheduleNum =
2835       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2836   llvm::Value *UpdatedLocation =
2837       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2843                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2847                                           SourceLocation Loc,
2848                                           OpenMPDirectiveKind DKind) {
2849   if (!CGF.HaveInsertPoint())
2850     return;
2851   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2852   llvm::Value *Args[] = {
2853       emitUpdateLocation(CGF, Loc,
2854                          isOpenMPDistributeDirective(DKind)
2855                              ? OMP_IDENT_WORK_DISTRIBUTE
2856                              : isOpenMPLoopDirective(DKind)
2857                                    ? OMP_IDENT_WORK_LOOP
2858                                    : OMP_IDENT_WORK_SECTIONS),
2859       getThreadID(CGF, Loc)};
2860   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2861   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2863                       Args);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2867                                                  SourceLocation Loc,
2868                                                  unsigned IVSize,
2869                                                  bool IVSigned) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2874   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2875 }
2876 
2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2878                                           SourceLocation Loc, unsigned IVSize,
2879                                           bool IVSigned, Address IL,
2880                                           Address LB, Address UB,
2881                                           Address ST) {
2882   // Call __kmpc_dispatch_next(
2883   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2884   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2885   //          kmp_int[32|64] *p_stride);
2886   llvm::Value *Args[] = {
2887       emitUpdateLocation(CGF, Loc),
2888       getThreadID(CGF, Loc),
2889       IL.getPointer(), // &isLastIter
2890       LB.getPointer(), // &Lower
2891       UB.getPointer(), // &Upper
2892       ST.getPointer()  // &Stride
2893   };
2894   llvm::Value *Call =
2895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2896   return CGF.EmitScalarConversion(
2897       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2898       CGF.getContext().BoolTy, Loc);
2899 }
2900 
2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2902                                            llvm::Value *NumThreads,
2903                                            SourceLocation Loc) {
2904   if (!CGF.HaveInsertPoint())
2905     return;
2906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2907   llvm::Value *Args[] = {
2908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2910   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2911                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2912                       Args);
2913 }
2914 
2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2916                                          ProcBindKind ProcBind,
2917                                          SourceLocation Loc) {
2918   if (!CGF.HaveInsertPoint())
2919     return;
2920   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2921   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2922   llvm::Value *Args[] = {
2923       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2924       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2925   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2926                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2927                       Args);
2928 }
2929 
2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2931                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2932   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2933     OMPBuilder.createFlush(CGF.Builder);
2934   } else {
2935     if (!CGF.HaveInsertPoint())
2936       return;
2937     // Build call void __kmpc_flush(ident_t *loc)
2938     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                             CGM.getModule(), OMPRTL___kmpc_flush),
2940                         emitUpdateLocation(CGF, Loc));
2941   }
2942 }
2943 
2944 namespace {
2945 /// Indexes of fields for type kmp_task_t.
2946 enum KmpTaskTFields {
2947   /// List of shared variables.
2948   KmpTaskTShareds,
2949   /// Task routine.
2950   KmpTaskTRoutine,
2951   /// Partition id for the untied tasks.
2952   KmpTaskTPartId,
2953   /// Function with call of destructors for private variables.
2954   Data1,
2955   /// Task priority.
2956   Data2,
2957   /// (Taskloops only) Lower bound.
2958   KmpTaskTLowerBound,
2959   /// (Taskloops only) Upper bound.
2960   KmpTaskTUpperBound,
2961   /// (Taskloops only) Stride.
2962   KmpTaskTStride,
2963   /// (Taskloops only) Is last iteration flag.
2964   KmpTaskTLastIter,
2965   /// (Taskloops only) Reduction data.
2966   KmpTaskTReductions,
2967 };
2968 } // anonymous namespace
2969 
2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2971   return OffloadEntriesTargetRegion.empty() &&
2972          OffloadEntriesDeviceGlobalVar.empty();
2973 }
2974 
2975 /// Initialize target region entry.
2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2977     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2978                                     StringRef ParentName, unsigned LineNum,
2979                                     unsigned Order) {
2980   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2981                                              "only required for the device "
2982                                              "code generation.");
2983   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2984       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2985                                    OMPTargetRegionEntryTargetRegion);
2986   ++OffloadingEntriesNum;
2987 }
2988 
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                   StringRef ParentName, unsigned LineNum,
2992                                   llvm::Constant *Addr, llvm::Constant *ID,
2993                                   OMPTargetRegionEntryKind Flags) {
2994   // If we are emitting code for a target, the entry is already initialized,
2995   // only has to be registered.
2996   if (CGM.getLangOpts().OpenMPIsDevice) {
2997     // This could happen if the device compilation is invoked standalone.
2998     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2999       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3000                                       OffloadingEntriesNum);
3001     auto &Entry =
3002         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3003     Entry.setAddress(Addr);
3004     Entry.setID(ID);
3005     Entry.setFlags(Flags);
3006   } else {
3007     if (Flags ==
3008             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3009         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3010                                  /*IgnoreAddressId*/ true))
3011       return;
3012     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3013            "Target region entry already registered!");
3014     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3015     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3016     ++OffloadingEntriesNum;
3017   }
3018 }
3019 
3020 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3021     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3022     bool IgnoreAddressId) const {
3023   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3024   if (PerDevice == OffloadEntriesTargetRegion.end())
3025     return false;
3026   auto PerFile = PerDevice->second.find(FileID);
3027   if (PerFile == PerDevice->second.end())
3028     return false;
3029   auto PerParentName = PerFile->second.find(ParentName);
3030   if (PerParentName == PerFile->second.end())
3031     return false;
3032   auto PerLine = PerParentName->second.find(LineNum);
3033   if (PerLine == PerParentName->second.end())
3034     return false;
3035   // Fail if this entry is already registered.
3036   if (!IgnoreAddressId &&
3037       (PerLine->second.getAddress() || PerLine->second.getID()))
3038     return false;
3039   return true;
3040 }
3041 
3042 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3043     const OffloadTargetRegionEntryInfoActTy &Action) {
3044   // Scan all target region entries and perform the provided action.
3045   for (const auto &D : OffloadEntriesTargetRegion)
3046     for (const auto &F : D.second)
3047       for (const auto &P : F.second)
3048         for (const auto &L : P.second)
3049           Action(D.first, F.first, P.first(), L.first, L.second);
3050 }
3051 
3052 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3053     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3054                                        OMPTargetGlobalVarEntryKind Flags,
3055                                        unsigned Order) {
3056   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3057                                              "only required for the device "
3058                                              "code generation.");
3059   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3060   ++OffloadingEntriesNum;
3061 }
3062 
3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3064     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3065                                      CharUnits VarSize,
3066                                      OMPTargetGlobalVarEntryKind Flags,
3067                                      llvm::GlobalValue::LinkageTypes Linkage) {
3068   if (CGM.getLangOpts().OpenMPIsDevice) {
3069     // This could happen if the device compilation is invoked standalone.
3070     if (!hasDeviceGlobalVarEntryInfo(VarName))
3071       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3072     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3073     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3074            "Resetting with the new address.");
3075     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3076       if (Entry.getVarSize().isZero()) {
3077         Entry.setVarSize(VarSize);
3078         Entry.setLinkage(Linkage);
3079       }
3080       return;
3081     }
3082     Entry.setVarSize(VarSize);
3083     Entry.setLinkage(Linkage);
3084     Entry.setAddress(Addr);
3085   } else {
3086     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3087       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3088       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3089              "Entry not initialized!");
3090       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3091              "Resetting with the new address.");
3092       if (Entry.getVarSize().isZero()) {
3093         Entry.setVarSize(VarSize);
3094         Entry.setLinkage(Linkage);
3095       }
3096       return;
3097     }
3098     OffloadEntriesDeviceGlobalVar.try_emplace(
3099         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100     ++OffloadingEntriesNum;
3101   }
3102 }
3103 
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3105     actOnDeviceGlobalVarEntriesInfo(
3106         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107   // Scan all target region entries and perform the provided action.
3108   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109     Action(E.getKey(), E.getValue());
3110 }
3111 
3112 void CGOpenMPRuntime::createOffloadEntry(
3113     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114     llvm::GlobalValue::LinkageTypes Linkage) {
3115   StringRef Name = Addr->getName();
3116   llvm::Module &M = CGM.getModule();
3117   llvm::LLVMContext &C = M.getContext();
3118 
3119   // Create constant string with the name.
3120   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121 
3122   std::string StringName = getName({"omp_offloading", "entry_name"});
3123   auto *Str = new llvm::GlobalVariable(
3124       M, StrPtrInit->getType(), /*isConstant=*/true,
3125       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127 
3128   llvm::Constant *Data[] = {
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131       llvm::ConstantInt::get(CGM.SizeTy, Size),
3132       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134   std::string EntryName = getName({"omp_offloading", "entry", ""});
3135   llvm::GlobalVariable *Entry = createGlobalStruct(
3136       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138 
3139   // The entry has to be created in the section the linker expects it to be.
3140   Entry->setSection("omp_offloading_entries");
3141 }
3142 
3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144   // Emit the offloading entries and metadata so that the device codegen side
3145   // can easily figure out what to emit. The produced metadata looks like
3146   // this:
3147   //
3148   // !omp_offload.info = !{!1, ...}
3149   //
3150   // Right now we only generate metadata for function that contain target
3151   // regions.
3152 
3153   // If we are in simd mode or there are no entries, we don't need to do
3154   // anything.
3155   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156     return;
3157 
3158   llvm::Module &M = CGM.getModule();
3159   llvm::LLVMContext &C = M.getContext();
3160   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161                          SourceLocation, StringRef>,
3162               16>
3163       OrderedEntries(OffloadEntriesInfoManager.size());
3164   llvm::SmallVector<StringRef, 16> ParentFunctions(
3165       OffloadEntriesInfoManager.size());
3166 
3167   // Auxiliary methods to create metadata values and strings.
3168   auto &&GetMDInt = [this](unsigned V) {
3169     return llvm::ConstantAsMetadata::get(
3170         llvm::ConstantInt::get(CGM.Int32Ty, V));
3171   };
3172 
3173   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174 
3175   // Create the offloading info metadata node.
3176   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177 
3178   // Create function that emits metadata for each target region entry;
3179   auto &&TargetRegionMetadataEmitter =
3180       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181        &GetMDString](
3182           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183           unsigned Line,
3184           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185         // Generate metadata for target regions. Each entry of this metadata
3186         // contains:
3187         // - Entry 0 -> Kind of this type of metadata (0).
3188         // - Entry 1 -> Device ID of the file where the entry was identified.
3189         // - Entry 2 -> File ID of the file where the entry was identified.
3190         // - Entry 3 -> Mangled name of the function where the entry was
3191         // identified.
3192         // - Entry 4 -> Line in the file where the entry was identified.
3193         // - Entry 5 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196                                  GetMDInt(FileID),      GetMDString(ParentName),
3197                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3198 
3199         SourceLocation Loc;
3200         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201                   E = CGM.getContext().getSourceManager().fileinfo_end();
3202              I != E; ++I) {
3203           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204               I->getFirst()->getUniqueID().getFile() == FileID) {
3205             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206                 I->getFirst(), Line, 1);
3207             break;
3208           }
3209         }
3210         // Save this entry in the right position of the ordered entries array.
3211         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212         ParentFunctions[E.getOrder()] = ParentName;
3213 
3214         // Add metadata to the named metadata node.
3215         MD->addOperand(llvm::MDNode::get(C, Ops));
3216       };
3217 
3218   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219       TargetRegionMetadataEmitter);
3220 
3221   // Create function that emits metadata for each device global variable entry;
3222   auto &&DeviceGlobalVarMetadataEmitter =
3223       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224        MD](StringRef MangledName,
3225            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226                &E) {
3227         // Generate metadata for global variables. Each entry of this metadata
3228         // contains:
3229         // - Entry 0 -> Kind of this type of metadata (1).
3230         // - Entry 1 -> Mangled name of the variable.
3231         // - Entry 2 -> Declare target kind.
3232         // - Entry 3 -> Order the entry was created.
3233         // The first element of the metadata node is the kind.
3234         llvm::Metadata *Ops[] = {
3235             GetMDInt(E.getKind()), GetMDString(MangledName),
3236             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237 
3238         // Save this entry in the right position of the ordered entries array.
3239         OrderedEntries[E.getOrder()] =
3240             std::make_tuple(&E, SourceLocation(), MangledName);
3241 
3242         // Add metadata to the named metadata node.
3243         MD->addOperand(llvm::MDNode::get(C, Ops));
3244       };
3245 
3246   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247       DeviceGlobalVarMetadataEmitter);
3248 
3249   for (const auto &E : OrderedEntries) {
3250     assert(std::get<0>(E) && "All ordered entries must exist!");
3251     if (const auto *CE =
3252             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253                 std::get<0>(E))) {
3254       if (!CE->getID() || !CE->getAddress()) {
3255         // Do not blame the entry if the parent funtion is not emitted.
3256         StringRef FnName = ParentFunctions[CE->getOrder()];
3257         if (!CGM.GetGlobalValue(FnName))
3258           continue;
3259         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260             DiagnosticsEngine::Error,
3261             "Offloading entry for target region in %0 is incorrect: either the "
3262             "address or the ID is invalid.");
3263         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264         continue;
3265       }
3266       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269                                              OffloadEntryInfoDeviceGlobalVar>(
3270                    std::get<0>(E))) {
3271       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273               CE->getFlags());
3274       switch (Flags) {
3275       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276         if (CGM.getLangOpts().OpenMPIsDevice &&
3277             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278           continue;
3279         if (!CE->getAddress()) {
3280           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281               DiagnosticsEngine::Error, "Offloading entry for declare target "
3282                                         "variable %0 is incorrect: the "
3283                                         "address is invalid.");
3284           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285           continue;
3286         }
3287         // The vaiable has no definition - no need to add the entry.
3288         if (CE->getVarSize().isZero())
3289           continue;
3290         break;
3291       }
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295                "Declaret target link address is set.");
3296         if (CGM.getLangOpts().OpenMPIsDevice)
3297           continue;
3298         if (!CE->getAddress()) {
3299           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300               DiagnosticsEngine::Error,
3301               "Offloading entry for declare target variable is incorrect: the "
3302               "address is invalid.");
3303           CGM.getDiags().Report(DiagID);
3304           continue;
3305         }
3306         break;
3307       }
3308       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309                          CE->getVarSize().getQuantity(), Flags,
3310                          CE->getLinkage());
3311     } else {
3312       llvm_unreachable("Unsupported entry kind.");
3313     }
3314   }
3315 }
3316 
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320   // If we are in target mode, load the metadata from the host IR. This code has
3321   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322 
3323   if (!CGM.getLangOpts().OpenMPIsDevice)
3324     return;
3325 
3326   if (CGM.getLangOpts().OMPHostIRFile.empty())
3327     return;
3328 
3329   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330   if (auto EC = Buf.getError()) {
3331     CGM.getDiags().Report(diag::err_cannot_open_file)
3332         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333     return;
3334   }
3335 
3336   llvm::LLVMContext C;
3337   auto ME = expectedToErrorOrAndEmitErrors(
3338       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339 
3340   if (auto EC = ME.getError()) {
3341     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343     CGM.getDiags().Report(DiagID)
3344         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345     return;
3346   }
3347 
3348   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349   if (!MD)
3350     return;
3351 
3352   for (llvm::MDNode *MN : MD->operands()) {
3353     auto &&GetMDInt = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356     };
3357 
3358     auto &&GetMDString = [MN](unsigned Idx) {
3359       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360       return V->getString();
3361     };
3362 
3363     switch (GetMDInt(0)) {
3364     default:
3365       llvm_unreachable("Unexpected metadata!");
3366       break;
3367     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368         OffloadingEntryInfoTargetRegion:
3369       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372           /*Order=*/GetMDInt(5));
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoDeviceGlobalVar:
3376       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377           /*MangledName=*/GetMDString(1),
3378           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379               /*Flags=*/GetMDInt(2)),
3380           /*Order=*/GetMDInt(3));
3381       break;
3382     }
3383   }
3384 }
3385 
3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387   if (!KmpRoutineEntryPtrTy) {
3388     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389     ASTContext &C = CGM.getContext();
3390     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391     FunctionProtoType::ExtProtoInfo EPI;
3392     KmpRoutineEntryPtrQTy = C.getPointerType(
3393         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395   }
3396 }
3397 
3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399   // Make sure the type of the entry is already created. This is the type we
3400   // have to create:
3401   // struct __tgt_offload_entry{
3402   //   void      *addr;       // Pointer to the offload entry info.
3403   //                          // (function or global)
3404   //   char      *name;       // Name of the function or global.
3405   //   size_t     size;       // Size of the entry info (0 if it a function).
3406   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3407   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3408   // };
3409   if (TgtOffloadEntryQTy.isNull()) {
3410     ASTContext &C = CGM.getContext();
3411     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412     RD->startDefinition();
3413     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415     addFieldToRecordDecl(C, RD, C.getSizeType());
3416     addFieldToRecordDecl(
3417         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418     addFieldToRecordDecl(
3419         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420     RD->completeDefinition();
3421     RD->addAttr(PackedAttr::CreateImplicit(C));
3422     TgtOffloadEntryQTy = C.getRecordType(RD);
3423   }
3424   return TgtOffloadEntryQTy;
3425 }
3426 
3427 namespace {
3428 struct PrivateHelpersTy {
3429   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432         PrivateElemInit(PrivateElemInit) {}
3433   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434   const Expr *OriginalRef = nullptr;
3435   const VarDecl *Original = nullptr;
3436   const VarDecl *PrivateCopy = nullptr;
3437   const VarDecl *PrivateElemInit = nullptr;
3438   bool isLocalPrivate() const {
3439     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440   }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444 
3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446   const VarDecl *CVD = VD->getCanonicalDecl();
3447   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448     return false;
3449   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450   // Use the default allocation.
3451   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453            !AA->getAllocator());
3454 }
3455 
3456 static RecordDecl *
3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458   if (!Privates.empty()) {
3459     ASTContext &C = CGM.getContext();
3460     // Build struct .kmp_privates_t. {
3461     //         /*  private vars  */
3462     //       };
3463     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464     RD->startDefinition();
3465     for (const auto &Pair : Privates) {
3466       const VarDecl *VD = Pair.second.Original;
3467       QualType Type = VD->getType().getNonReferenceType();
3468       // If the private variable is a local variable with lvalue ref type,
3469       // allocate the pointer instead of the pointee type.
3470       if (Pair.second.isLocalPrivate()) {
3471         if (VD->getType()->isLValueReferenceType())
3472           Type = C.getPointerType(Type);
3473         if (isAllocatableDecl(VD))
3474           Type = C.getPointerType(Type);
3475       }
3476       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477       if (VD->hasAttrs()) {
3478         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479              E(VD->getAttrs().end());
3480              I != E; ++I)
3481           FD->addAttr(*I);
3482       }
3483     }
3484     RD->completeDefinition();
3485     return RD;
3486   }
3487   return nullptr;
3488 }
3489 
3490 static RecordDecl *
3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492                          QualType KmpInt32Ty,
3493                          QualType KmpRoutineEntryPointerQTy) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t {
3496   //         void *              shareds;
3497   //         kmp_routine_entry_t routine;
3498   //         kmp_int32           part_id;
3499   //         kmp_cmplrdata_t data1;
3500   //         kmp_cmplrdata_t data2;
3501   // For taskloops additional fields:
3502   //         kmp_uint64          lb;
3503   //         kmp_uint64          ub;
3504   //         kmp_int64           st;
3505   //         kmp_int32           liter;
3506   //         void *              reductions;
3507   //       };
3508   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509   UD->startDefinition();
3510   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512   UD->completeDefinition();
3513   QualType KmpCmplrdataTy = C.getRecordType(UD);
3514   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515   RD->startDefinition();
3516   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521   if (isOpenMPTaskLoopDirective(Kind)) {
3522     QualType KmpUInt64Ty =
3523         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524     QualType KmpInt64Ty =
3525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531   }
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 static RecordDecl *
3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538                                      ArrayRef<PrivateDataTy> Privates) {
3539   ASTContext &C = CGM.getContext();
3540   // Build struct kmp_task_t_with_privates {
3541   //         kmp_task_t task_data;
3542   //         .kmp_privates_t. privates;
3543   //       };
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 ///   For taskloops:
3559 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 ///   tt->reductions, tt->shareds);
3561 ///   return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567                       QualType KmpTaskTWithPrivatesPtrQTy,
3568                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570                       llvm::Value *TaskPrivatesMap) {
3571   ASTContext &C = CGM.getContext();
3572   FunctionArgList Args;
3573   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574                             ImplicitParamDecl::Other);
3575   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577                                 ImplicitParamDecl::Other);
3578   Args.push_back(&GtidArg);
3579   Args.push_back(&TaskTypeArg);
3580   const auto &TaskEntryFnInfo =
3581       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582   llvm::FunctionType *TaskEntryTy =
3583       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585   auto *TaskEntry = llvm::Function::Create(
3586       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588   TaskEntry->setDoesNotRecurse();
3589   CodeGenFunction CGF(CGM);
3590   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591                     Loc, Loc);
3592 
3593   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594   // tt,
3595   // For taskloops:
3596   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597   // tt->task_data.shareds);
3598   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603   const auto *KmpTaskTWithPrivatesQTyRD =
3604       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605   LValue Base =
3606       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611 
3612   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616       CGF.ConvertTypeForMem(SharedsPtrTy));
3617 
3618   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619   llvm::Value *PrivatesParam;
3620   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624   } else {
3625     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626   }
3627 
3628   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629                                TaskPrivatesMap,
3630                                CGF.Builder
3631                                    .CreatePointerBitCastOrAddrSpaceCast(
3632                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633                                    .getPointer()};
3634   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635                                           std::end(CommonArgs));
3636   if (isOpenMPTaskLoopDirective(Kind)) {
3637     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652     CallArgs.push_back(LBParam);
3653     CallArgs.push_back(UBParam);
3654     CallArgs.push_back(StParam);
3655     CallArgs.push_back(LIParam);
3656     CallArgs.push_back(RParam);
3657   }
3658   CallArgs.push_back(SharedsParam);
3659 
3660   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661                                                   CallArgs);
3662   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664   CGF.FinishFunction();
3665   return TaskEntry;
3666 }
3667 
3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669                                             SourceLocation Loc,
3670                                             QualType KmpInt32Ty,
3671                                             QualType KmpTaskTWithPrivatesPtrQTy,
3672                                             QualType KmpTaskTWithPrivatesQTy) {
3673   ASTContext &C = CGM.getContext();
3674   FunctionArgList Args;
3675   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676                             ImplicitParamDecl::Other);
3677   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679                                 ImplicitParamDecl::Other);
3680   Args.push_back(&GtidArg);
3681   Args.push_back(&TaskTypeArg);
3682   const auto &DestructorFnInfo =
3683       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684   llvm::FunctionType *DestructorFnTy =
3685       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686   std::string Name =
3687       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688   auto *DestructorFn =
3689       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690                              Name, &CGM.getModule());
3691   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692                                     DestructorFnInfo);
3693   DestructorFn->setDoesNotRecurse();
3694   CodeGenFunction CGF(CGM);
3695   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696                     Args, Loc, Loc);
3697 
3698   LValue Base = CGF.EmitLoadOfPointerLValue(
3699       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701   const auto *KmpTaskTWithPrivatesQTyRD =
3702       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704   Base = CGF.EmitLValueForField(Base, *FI);
3705   for (const auto *Field :
3706        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707     if (QualType::DestructionKind DtorKind =
3708             Field->getType().isDestructedType()) {
3709       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711     }
3712   }
3713   CGF.FinishFunction();
3714   return DestructorFn;
3715 }
3716 
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,...,  <tyn> **noalias privn) {
3722 ///   *priv1 = &.privates.priv1;
3723 ///   ...;
3724 ///   *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730                                ArrayRef<PrivateDataTy> Privates) {
3731   ASTContext &C = CGM.getContext();
3732   FunctionArgList Args;
3733   ImplicitParamDecl TaskPrivatesArg(
3734       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736       ImplicitParamDecl::Other);
3737   Args.push_back(&TaskPrivatesArg);
3738   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739   unsigned Counter = 1;
3740   for (const Expr *E : Data.PrivateVars) {
3741     Args.push_back(ImplicitParamDecl::Create(
3742         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743         C.getPointerType(C.getPointerType(E->getType()))
3744             .withConst()
3745             .withRestrict(),
3746         ImplicitParamDecl::Other));
3747     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   for (const Expr *E : Data.FirstprivateVars) {
3752     Args.push_back(ImplicitParamDecl::Create(
3753         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754         C.getPointerType(C.getPointerType(E->getType()))
3755             .withConst()
3756             .withRestrict(),
3757         ImplicitParamDecl::Other));
3758     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759     PrivateVarsPos[VD] = Counter;
3760     ++Counter;
3761   }
3762   for (const Expr *E : Data.LastprivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const VarDecl *VD : Data.PrivateLocals) {
3774     QualType Ty = VD->getType().getNonReferenceType();
3775     if (VD->getType()->isLValueReferenceType())
3776       Ty = C.getPointerType(Ty);
3777     if (isAllocatableDecl(VD))
3778       Ty = C.getPointerType(Ty);
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782         ImplicitParamDecl::Other));
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847   // PointersArray, SizesArray, and MappersArray. The original variables for
3848   // these arrays are not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     // Do not initialize private locals.
3859     if (Pair.second.isLocalPrivate()) {
3860       ++FI;
3861       continue;
3862     }
3863     const VarDecl *VD = Pair.second.PrivateCopy;
3864     const Expr *Init = VD->getAnyInitializer();
3865     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866                              !CGF.isTrivialInitializer(Init)))) {
3867       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869         const VarDecl *OriginalVD = Pair.second.Original;
3870         // Check if the variable is the target-based BasePointersArray,
3871         // PointersArray, SizesArray, or MappersArray.
3872         LValue SharedRefLValue;
3873         QualType Type = PrivateLValue.getType();
3874         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875         if (IsTargetTask && !SharedField) {
3876           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3879                          ->getNumParams() == 0 &&
3880                  isa<TranslationUnitDecl>(
3881                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3882                          ->getDeclContext()) &&
3883                  "Expected artificial target data variable.");
3884           SharedRefLValue =
3885               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886         } else if (ForDup) {
3887           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888           SharedRefLValue = CGF.MakeAddrLValue(
3889               Address(SharedRefLValue.getPointer(CGF),
3890                       C.getDeclAlign(OriginalVD)),
3891               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892               SharedRefLValue.getTBAAInfo());
3893         } else if (CGF.LambdaCaptureFields.count(
3894                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3895                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897         } else {
3898           // Processing for implicitly captured variables.
3899           InlinedOpenMPRegionRAII Region(
3900               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901               /*HasCancel=*/false, /*NoInheritance=*/true);
3902           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903         }
3904         if (Type->isArrayType()) {
3905           // Initialize firstprivate array.
3906           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907             // Perform simple memcpy.
3908             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909           } else {
3910             // Initialize firstprivate array using element-by-element
3911             // initialization.
3912             CGF.EmitOMPAggregateAssign(
3913                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914                 Type,
3915                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916                                                   Address SrcElement) {
3917                   // Clean up any temporaries needed by the initialization.
3918                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919                   InitScope.addPrivate(
3920                       Elem, [SrcElement]() -> Address { return SrcElement; });
3921                   (void)InitScope.Privatize();
3922                   // Emit initialization for single element.
3923                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924                       CGF, &CapturesInfo);
3925                   CGF.EmitAnyExprToMem(Init, DestElement,
3926                                        Init->getType().getQualifiers(),
3927                                        /*IsInitializer=*/false);
3928                 });
3929           }
3930         } else {
3931           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933             return SharedRefLValue.getAddress(CGF);
3934           });
3935           (void)InitScope.Privatize();
3936           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938                              /*capturedByInit=*/false);
3939         }
3940       } else {
3941         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942       }
3943     }
3944     ++FI;
3945   }
3946 }
3947 
3948 /// Check if duplication function is required for taskloops.
3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950                                 ArrayRef<PrivateDataTy> Privates) {
3951   bool InitRequired = false;
3952   for (const PrivateDataTy &Pair : Privates) {
3953     if (Pair.second.isLocalPrivate())
3954       continue;
3955     const VarDecl *VD = Pair.second.PrivateCopy;
3956     const Expr *Init = VD->getAnyInitializer();
3957     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958                                     !CGF.isTrivialInitializer(Init));
3959     if (InitRequired)
3960       break;
3961   }
3962   return InitRequired;
3963 }
3964 
3965 
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 ///    task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978                     const OMPExecutableDirective &D,
3979                     QualType KmpTaskTWithPrivatesPtrQTy,
3980                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984   ASTContext &C = CGM.getContext();
3985   FunctionArgList Args;
3986   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987                            KmpTaskTWithPrivatesPtrQTy,
3988                            ImplicitParamDecl::Other);
3989   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990                            KmpTaskTWithPrivatesPtrQTy,
3991                            ImplicitParamDecl::Other);
3992   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993                                 ImplicitParamDecl::Other);
3994   Args.push_back(&DstArg);
3995   Args.push_back(&SrcArg);
3996   Args.push_back(&LastprivArg);
3997   const auto &TaskDupFnInfo =
3998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001   auto *TaskDup = llvm::Function::Create(
4002       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004   TaskDup->setDoesNotRecurse();
4005   CodeGenFunction CGF(CGM);
4006   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007                     Loc);
4008 
4009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010       CGF.GetAddrOfLocalVar(&DstArg),
4011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012   // task_dst->liter = lastpriv;
4013   if (WithLastIter) {
4014     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015     LValue Base = CGF.EmitLValueForField(
4016         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021   }
4022 
4023   // Emit initial values for private copies (if any).
4024   assert(!Privates.empty());
4025   Address KmpTaskSharedsPtr = Address::invalid();
4026   if (!Data.FirstprivateVars.empty()) {
4027     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028         CGF.GetAddrOfLocalVar(&SrcArg),
4029         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030     LValue Base = CGF.EmitLValueForField(
4031         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032     KmpTaskSharedsPtr = Address(
4033         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035                                                   KmpTaskTShareds)),
4036                              Loc),
4037         CGM.getNaturalTypeAlignment(SharedsTy));
4038   }
4039   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041   CGF.FinishFunction();
4042   return TaskDup;
4043 }
4044 
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049                          ArrayRef<PrivateDataTy> Privates) {
4050   for (const PrivateDataTy &P : Privates) {
4051     if (P.second.isLocalPrivate())
4052       continue;
4053     QualType Ty = P.second.Original->getType().getNonReferenceType();
4054     if (Ty.isDestructedType())
4055       return true;
4056   }
4057   return false;
4058 }
4059 
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063     : public CodeGenFunction::OMPPrivateScope {
4064   CodeGenFunction &CGF;
4065   const OMPIteratorExpr *E = nullptr;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068   OMPIteratorGeneratorScope() = delete;
4069   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070 
4071 public:
4072   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074     if (!E)
4075       return;
4076     SmallVector<llvm::Value *, 4> Uppers;
4077     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080       addPrivate(VD, [&CGF, VD]() {
4081         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082       });
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086                                  "counter.addr");
4087       });
4088     }
4089     Privatize();
4090 
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093       LValue CLVal =
4094           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095                              HelperData.CounterVD->getType());
4096       // Counter = 0;
4097       CGF.EmitStoreOfScalar(
4098           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099           CLVal);
4100       CodeGenFunction::JumpDest &ContDest =
4101           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102       CodeGenFunction::JumpDest &ExitDest =
4103           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104       // N = <number-of_iterations>;
4105       llvm::Value *N = Uppers[I];
4106       // cont:
4107       // if (Counter < N) goto body; else goto exit;
4108       CGF.EmitBlock(ContDest.getBlock());
4109       auto *CVal =
4110           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111       llvm::Value *Cmp =
4112           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113               ? CGF.Builder.CreateICmpSLT(CVal, N)
4114               : CGF.Builder.CreateICmpULT(CVal, N);
4115       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117       // body:
4118       CGF.EmitBlock(BodyBB);
4119       // Iteri = Begini + Counter * Stepi;
4120       CGF.EmitIgnoredExpr(HelperData.Update);
4121     }
4122   }
4123   ~OMPIteratorGeneratorScope() {
4124     if (!E)
4125       return;
4126     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127       // Counter = Counter + 1;
4128       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130       // goto cont;
4131       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132       // exit:
4133       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134     }
4135   }
4136 };
4137 } // namespace
4138 
4139 static std::pair<llvm::Value *, llvm::Value *>
4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142   llvm::Value *Addr;
4143   if (OASE) {
4144     const Expr *Base = OASE->getBase();
4145     Addr = CGF.EmitScalarExpr(Base);
4146   } else {
4147     Addr = CGF.EmitLValue(E).getPointer(CGF);
4148   }
4149   llvm::Value *SizeVal;
4150   QualType Ty = E->getType();
4151   if (OASE) {
4152     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153     for (const Expr *SE : OASE->getDimensions()) {
4154       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155       Sz = CGF.EmitScalarConversion(
4156           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158     }
4159   } else if (const auto *ASE =
4160                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161     LValue UpAddrLVal =
4162         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163     llvm::Value *UpAddr =
4164         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4165     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4166     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4167     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4168   } else {
4169     SizeVal = CGF.getTypeSize(Ty);
4170   }
4171   return std::make_pair(Addr, SizeVal);
4172 }
4173 
4174 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4175 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4176   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4177   if (KmpTaskAffinityInfoTy.isNull()) {
4178     RecordDecl *KmpAffinityInfoRD =
4179         C.buildImplicitRecord("kmp_task_affinity_info_t");
4180     KmpAffinityInfoRD->startDefinition();
4181     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4184     KmpAffinityInfoRD->completeDefinition();
4185     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4186   }
4187 }
4188 
4189 CGOpenMPRuntime::TaskResultTy
4190 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4191                               const OMPExecutableDirective &D,
4192                               llvm::Function *TaskFunction, QualType SharedsTy,
4193                               Address Shareds, const OMPTaskDataTy &Data) {
4194   ASTContext &C = CGM.getContext();
4195   llvm::SmallVector<PrivateDataTy, 4> Privates;
4196   // Aggregate privates and sort them by the alignment.
4197   const auto *I = Data.PrivateCopies.begin();
4198   for (const Expr *E : Data.PrivateVars) {
4199     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4200     Privates.emplace_back(
4201         C.getDeclAlign(VD),
4202         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4203                          /*PrivateElemInit=*/nullptr));
4204     ++I;
4205   }
4206   I = Data.FirstprivateCopies.begin();
4207   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4208   for (const Expr *E : Data.FirstprivateVars) {
4209     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4210     Privates.emplace_back(
4211         C.getDeclAlign(VD),
4212         PrivateHelpersTy(
4213             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4214             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4215     ++I;
4216     ++IElemInitRef;
4217   }
4218   I = Data.LastprivateCopies.begin();
4219   for (const Expr *E : Data.LastprivateVars) {
4220     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4221     Privates.emplace_back(
4222         C.getDeclAlign(VD),
4223         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4224                          /*PrivateElemInit=*/nullptr));
4225     ++I;
4226   }
4227   for (const VarDecl *VD : Data.PrivateLocals) {
4228     if (isAllocatableDecl(VD))
4229       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4230     else
4231       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4232   }
4233   llvm::stable_sort(Privates,
4234                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4235                       return L.first > R.first;
4236                     });
4237   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4238   // Build type kmp_routine_entry_t (if not built yet).
4239   emitKmpRoutineEntryT(KmpInt32Ty);
4240   // Build type kmp_task_t (if not built yet).
4241   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4242     if (SavedKmpTaskloopTQTy.isNull()) {
4243       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4244           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4245     }
4246     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4247   } else {
4248     assert((D.getDirectiveKind() == OMPD_task ||
4249             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4250             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4251            "Expected taskloop, task or target directive");
4252     if (SavedKmpTaskTQTy.isNull()) {
4253       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4254           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4255     }
4256     KmpTaskTQTy = SavedKmpTaskTQTy;
4257   }
4258   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4259   // Build particular struct kmp_task_t for the given task.
4260   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4261       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4262   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4263   QualType KmpTaskTWithPrivatesPtrQTy =
4264       C.getPointerType(KmpTaskTWithPrivatesQTy);
4265   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4267       KmpTaskTWithPrivatesTy->getPointerTo();
4268   llvm::Value *KmpTaskTWithPrivatesTySize =
4269       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4270   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4271 
4272   // Emit initial values for private copies (if any).
4273   llvm::Value *TaskPrivatesMap = nullptr;
4274   llvm::Type *TaskPrivatesMapTy =
4275       std::next(TaskFunction->arg_begin(), 3)->getType();
4276   if (!Privates.empty()) {
4277     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4278     TaskPrivatesMap =
4279         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4280     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4281         TaskPrivatesMap, TaskPrivatesMapTy);
4282   } else {
4283     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4284         cast<llvm::PointerType>(TaskPrivatesMapTy));
4285   }
4286   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4287   // kmp_task_t *tt);
4288   llvm::Function *TaskEntry = emitProxyTaskFunction(
4289       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4290       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4291       TaskPrivatesMap);
4292 
4293   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4294   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4295   // kmp_routine_entry_t *task_entry);
4296   // Task flags. Format is taken from
4297   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4298   // description of kmp_tasking_flags struct.
4299   enum {
4300     TiedFlag = 0x1,
4301     FinalFlag = 0x2,
4302     DestructorsFlag = 0x8,
4303     PriorityFlag = 0x20,
4304     DetachableFlag = 0x40,
4305   };
4306   unsigned Flags = Data.Tied ? TiedFlag : 0;
4307   bool NeedsCleanup = false;
4308   if (!Privates.empty()) {
4309     NeedsCleanup =
4310         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4311     if (NeedsCleanup)
4312       Flags = Flags | DestructorsFlag;
4313   }
4314   if (Data.Priority.getInt())
4315     Flags = Flags | PriorityFlag;
4316   if (D.hasClausesOfKind<OMPDetachClause>())
4317     Flags = Flags | DetachableFlag;
4318   llvm::Value *TaskFlags =
4319       Data.Final.getPointer()
4320           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4321                                      CGF.Builder.getInt32(FinalFlag),
4322                                      CGF.Builder.getInt32(/*C=*/0))
4323           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4324   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4325   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4326   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4327       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4328       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4329           TaskEntry, KmpRoutineEntryPtrTy)};
4330   llvm::Value *NewTask;
4331   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4332     // Check if we have any device clause associated with the directive.
4333     const Expr *Device = nullptr;
4334     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4335       Device = C->getDevice();
4336     // Emit device ID if any otherwise use default value.
4337     llvm::Value *DeviceID;
4338     if (Device)
4339       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4340                                            CGF.Int64Ty, /*isSigned=*/true);
4341     else
4342       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4343     AllocArgs.push_back(DeviceID);
4344     NewTask = CGF.EmitRuntimeCall(
4345         OMPBuilder.getOrCreateRuntimeFunction(
4346             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4347         AllocArgs);
4348   } else {
4349     NewTask =
4350         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4351                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4352                             AllocArgs);
4353   }
4354   // Emit detach clause initialization.
4355   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4356   // task_descriptor);
4357   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4358     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4359     LValue EvtLVal = CGF.EmitLValue(Evt);
4360 
4361     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4362     // int gtid, kmp_task_t *task);
4363     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4364     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4365     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4366     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4369         {Loc, Tid, NewTask});
4370     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4371                                       Evt->getExprLoc());
4372     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4373   }
4374   // Process affinity clauses.
4375   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4376     // Process list of affinity data.
4377     ASTContext &C = CGM.getContext();
4378     Address AffinitiesArray = Address::invalid();
4379     // Calculate number of elements to form the array of affinity data.
4380     llvm::Value *NumOfElements = nullptr;
4381     unsigned NumAffinities = 0;
4382     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4383       if (const Expr *Modifier = C->getModifier()) {
4384         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4385         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4386           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4387           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4388           NumOfElements =
4389               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4390         }
4391       } else {
4392         NumAffinities += C->varlist_size();
4393       }
4394     }
4395     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4396     // Fields ids in kmp_task_affinity_info record.
4397     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4398 
4399     QualType KmpTaskAffinityInfoArrayTy;
4400     if (NumOfElements) {
4401       NumOfElements = CGF.Builder.CreateNUWAdd(
4402           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4403       OpaqueValueExpr OVE(
4404           Loc,
4405           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4406           VK_RValue);
4407       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4408                                                     RValue::get(NumOfElements));
4409       KmpTaskAffinityInfoArrayTy =
4410           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4411                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4412       // Properly emit variable-sized array.
4413       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4414                                            ImplicitParamDecl::Other);
4415       CGF.EmitVarDecl(*PD);
4416       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4417       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4418                                                 /*isSigned=*/false);
4419     } else {
4420       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4421           KmpTaskAffinityInfoTy,
4422           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4423           ArrayType::Normal, /*IndexTypeQuals=*/0);
4424       AffinitiesArray =
4425           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4426       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4427       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4428                                              /*isSigned=*/false);
4429     }
4430 
4431     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4432     // Fill array by elements without iterators.
4433     unsigned Pos = 0;
4434     bool HasIterator = false;
4435     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4436       if (C->getModifier()) {
4437         HasIterator = true;
4438         continue;
4439       }
4440       for (const Expr *E : C->varlists()) {
4441         llvm::Value *Addr;
4442         llvm::Value *Size;
4443         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4444         LValue Base =
4445             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4446                                KmpTaskAffinityInfoTy);
4447         // affs[i].base_addr = &<Affinities[i].second>;
4448         LValue BaseAddrLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4450         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4451                               BaseAddrLVal);
4452         // affs[i].len = sizeof(<Affinities[i].second>);
4453         LValue LenLVal = CGF.EmitLValueForField(
4454             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4455         CGF.EmitStoreOfScalar(Size, LenLVal);
4456         ++Pos;
4457       }
4458     }
4459     LValue PosLVal;
4460     if (HasIterator) {
4461       PosLVal = CGF.MakeAddrLValue(
4462           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4463           C.getSizeType());
4464       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4465     }
4466     // Process elements with iterators.
4467     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4468       const Expr *Modifier = C->getModifier();
4469       if (!Modifier)
4470         continue;
4471       OMPIteratorGeneratorScope IteratorScope(
4472           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4473       for (const Expr *E : C->varlists()) {
4474         llvm::Value *Addr;
4475         llvm::Value *Size;
4476         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4477         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4478         LValue Base = CGF.MakeAddrLValue(
4479             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4480                     AffinitiesArray.getAlignment()),
4481             KmpTaskAffinityInfoTy);
4482         // affs[i].base_addr = &<Affinities[i].second>;
4483         LValue BaseAddrLVal = CGF.EmitLValueForField(
4484             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4485         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4486                               BaseAddrLVal);
4487         // affs[i].len = sizeof(<Affinities[i].second>);
4488         LValue LenLVal = CGF.EmitLValueForField(
4489             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4490         CGF.EmitStoreOfScalar(Size, LenLVal);
4491         Idx = CGF.Builder.CreateNUWAdd(
4492             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4493         CGF.EmitStoreOfScalar(Idx, PosLVal);
4494       }
4495     }
4496     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4497     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4498     // naffins, kmp_task_affinity_info_t *affin_list);
4499     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4500     llvm::Value *GTid = getThreadID(CGF, Loc);
4501     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4502         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4503     // FIXME: Emit the function and ignore its result for now unless the
4504     // runtime function is properly implemented.
4505     (void)CGF.EmitRuntimeCall(
4506         OMPBuilder.getOrCreateRuntimeFunction(
4507             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4508         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4509   }
4510   llvm::Value *NewTaskNewTaskTTy =
4511       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4512           NewTask, KmpTaskTWithPrivatesPtrTy);
4513   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4514                                                KmpTaskTWithPrivatesQTy);
4515   LValue TDBase =
4516       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4517   // Fill the data in the resulting kmp_task_t record.
4518   // Copy shareds if there are any.
4519   Address KmpTaskSharedsPtr = Address::invalid();
4520   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4521     KmpTaskSharedsPtr =
4522         Address(CGF.EmitLoadOfScalar(
4523                     CGF.EmitLValueForField(
4524                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4525                                            KmpTaskTShareds)),
4526                     Loc),
4527                 CGM.getNaturalTypeAlignment(SharedsTy));
4528     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4529     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4530     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4531   }
4532   // Emit initial values for private copies (if any).
4533   TaskResultTy Result;
4534   if (!Privates.empty()) {
4535     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4536                      SharedsTy, SharedsPtrTy, Data, Privates,
4537                      /*ForDup=*/false);
4538     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4539         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4540       Result.TaskDupFn = emitTaskDupFunction(
4541           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4542           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4543           /*WithLastIter=*/!Data.LastprivateVars.empty());
4544     }
4545   }
4546   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4547   enum { Priority = 0, Destructors = 1 };
4548   // Provide pointer to function with destructors for privates.
4549   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4550   const RecordDecl *KmpCmplrdataUD =
4551       (*FI)->getType()->getAsUnionType()->getDecl();
4552   if (NeedsCleanup) {
4553     llvm::Value *DestructorFn = emitDestructorsFunction(
4554         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4555         KmpTaskTWithPrivatesQTy);
4556     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4557     LValue DestructorsLV = CGF.EmitLValueForField(
4558         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4559     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4560                               DestructorFn, KmpRoutineEntryPtrTy),
4561                           DestructorsLV);
4562   }
4563   // Set priority.
4564   if (Data.Priority.getInt()) {
4565     LValue Data2LV = CGF.EmitLValueForField(
4566         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4567     LValue PriorityLV = CGF.EmitLValueForField(
4568         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4569     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4570   }
4571   Result.NewTask = NewTask;
4572   Result.TaskEntry = TaskEntry;
4573   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4574   Result.TDBase = TDBase;
4575   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4576   return Result;
4577 }
4578 
4579 namespace {
4580 /// Dependence kind for RTL.
4581 enum RTLDependenceKindTy {
4582   DepIn = 0x01,
4583   DepInOut = 0x3,
4584   DepMutexInOutSet = 0x4
4585 };
4586 /// Fields ids in kmp_depend_info record.
4587 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4588 } // namespace
4589 
4590 /// Translates internal dependency kind into the runtime kind.
4591 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4592   RTLDependenceKindTy DepKind;
4593   switch (K) {
4594   case OMPC_DEPEND_in:
4595     DepKind = DepIn;
4596     break;
4597   // Out and InOut dependencies must use the same code.
4598   case OMPC_DEPEND_out:
4599   case OMPC_DEPEND_inout:
4600     DepKind = DepInOut;
4601     break;
4602   case OMPC_DEPEND_mutexinoutset:
4603     DepKind = DepMutexInOutSet;
4604     break;
4605   case OMPC_DEPEND_source:
4606   case OMPC_DEPEND_sink:
4607   case OMPC_DEPEND_depobj:
4608   case OMPC_DEPEND_unknown:
4609     llvm_unreachable("Unknown task dependence type");
4610   }
4611   return DepKind;
4612 }
4613 
4614 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4615 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4616                            QualType &FlagsTy) {
4617   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4618   if (KmpDependInfoTy.isNull()) {
4619     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4620     KmpDependInfoRD->startDefinition();
4621     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4622     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4623     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4624     KmpDependInfoRD->completeDefinition();
4625     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4626   }
4627 }
4628 
4629 std::pair<llvm::Value *, LValue>
4630 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4631                                    SourceLocation Loc) {
4632   ASTContext &C = CGM.getContext();
4633   QualType FlagsTy;
4634   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4635   RecordDecl *KmpDependInfoRD =
4636       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4637   LValue Base = CGF.EmitLoadOfPointerLValue(
4638       DepobjLVal.getAddress(CGF),
4639       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4640   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4641   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4642           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4643   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4644                             Base.getTBAAInfo());
4645   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4646       Addr.getPointer(),
4647       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4648   LValue NumDepsBase = CGF.MakeAddrLValue(
4649       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4650       Base.getBaseInfo(), Base.getTBAAInfo());
4651   // NumDeps = deps[i].base_addr;
4652   LValue BaseAddrLVal = CGF.EmitLValueForField(
4653       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4654   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4655   return std::make_pair(NumDeps, Base);
4656 }
4657 
4658 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4659                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4660                            const OMPTaskDataTy::DependData &Data,
4661                            Address DependenciesArray) {
4662   CodeGenModule &CGM = CGF.CGM;
4663   ASTContext &C = CGM.getContext();
4664   QualType FlagsTy;
4665   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4666   RecordDecl *KmpDependInfoRD =
4667       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4668   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4669 
4670   OMPIteratorGeneratorScope IteratorScope(
4671       CGF, cast_or_null<OMPIteratorExpr>(
4672                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4673                                  : nullptr));
4674   for (const Expr *E : Data.DepExprs) {
4675     llvm::Value *Addr;
4676     llvm::Value *Size;
4677     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4678     LValue Base;
4679     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4680       Base = CGF.MakeAddrLValue(
4681           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4682     } else {
4683       LValue &PosLVal = *Pos.get<LValue *>();
4684       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4685       Base = CGF.MakeAddrLValue(
4686           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4687                   DependenciesArray.getAlignment()),
4688           KmpDependInfoTy);
4689     }
4690     // deps[i].base_addr = &<Dependencies[i].second>;
4691     LValue BaseAddrLVal = CGF.EmitLValueForField(
4692         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4693     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4694                           BaseAddrLVal);
4695     // deps[i].len = sizeof(<Dependencies[i].second>);
4696     LValue LenLVal = CGF.EmitLValueForField(
4697         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4698     CGF.EmitStoreOfScalar(Size, LenLVal);
4699     // deps[i].flags = <Dependencies[i].first>;
4700     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4701     LValue FlagsLVal = CGF.EmitLValueForField(
4702         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4703     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4704                           FlagsLVal);
4705     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4706       ++(*P);
4707     } else {
4708       LValue &PosLVal = *Pos.get<LValue *>();
4709       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4710       Idx = CGF.Builder.CreateNUWAdd(Idx,
4711                                      llvm::ConstantInt::get(Idx->getType(), 1));
4712       CGF.EmitStoreOfScalar(Idx, PosLVal);
4713     }
4714   }
4715 }
4716 
4717 static SmallVector<llvm::Value *, 4>
4718 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4719                         const OMPTaskDataTy::DependData &Data) {
4720   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4721          "Expected depobj dependecy kind.");
4722   SmallVector<llvm::Value *, 4> Sizes;
4723   SmallVector<LValue, 4> SizeLVals;
4724   ASTContext &C = CGF.getContext();
4725   QualType FlagsTy;
4726   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4727   RecordDecl *KmpDependInfoRD =
4728       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4729   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4730   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4731   {
4732     OMPIteratorGeneratorScope IteratorScope(
4733         CGF, cast_or_null<OMPIteratorExpr>(
4734                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4735                                    : nullptr));
4736     for (const Expr *E : Data.DepExprs) {
4737       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4738       LValue Base = CGF.EmitLoadOfPointerLValue(
4739           DepobjLVal.getAddress(CGF),
4740           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4741       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4742           Base.getAddress(CGF), KmpDependInfoPtrT);
4743       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4744                                 Base.getTBAAInfo());
4745       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4746           Addr.getPointer(),
4747           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4748       LValue NumDepsBase = CGF.MakeAddrLValue(
4749           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4750           Base.getBaseInfo(), Base.getTBAAInfo());
4751       // NumDeps = deps[i].base_addr;
4752       LValue BaseAddrLVal = CGF.EmitLValueForField(
4753           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4754       llvm::Value *NumDeps =
4755           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4756       LValue NumLVal = CGF.MakeAddrLValue(
4757           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4758           C.getUIntPtrType());
4759       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4760                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4761       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4762       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4763       CGF.EmitStoreOfScalar(Add, NumLVal);
4764       SizeLVals.push_back(NumLVal);
4765     }
4766   }
4767   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4768     llvm::Value *Size =
4769         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4770     Sizes.push_back(Size);
4771   }
4772   return Sizes;
4773 }
4774 
4775 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4776                                LValue PosLVal,
4777                                const OMPTaskDataTy::DependData &Data,
4778                                Address DependenciesArray) {
4779   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4780          "Expected depobj dependecy kind.");
4781   ASTContext &C = CGF.getContext();
4782   QualType FlagsTy;
4783   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4784   RecordDecl *KmpDependInfoRD =
4785       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4786   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4787   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4788   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4789   {
4790     OMPIteratorGeneratorScope IteratorScope(
4791         CGF, cast_or_null<OMPIteratorExpr>(
4792                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4793                                    : nullptr));
4794     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4795       const Expr *E = Data.DepExprs[I];
4796       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4797       LValue Base = CGF.EmitLoadOfPointerLValue(
4798           DepobjLVal.getAddress(CGF),
4799           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4800       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4801           Base.getAddress(CGF), KmpDependInfoPtrT);
4802       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4803                                 Base.getTBAAInfo());
4804 
4805       // Get number of elements in a single depobj.
4806       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4807           Addr.getPointer(),
4808           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4809       LValue NumDepsBase = CGF.MakeAddrLValue(
4810           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4811           Base.getBaseInfo(), Base.getTBAAInfo());
4812       // NumDeps = deps[i].base_addr;
4813       LValue BaseAddrLVal = CGF.EmitLValueForField(
4814           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4815       llvm::Value *NumDeps =
4816           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4817 
4818       // memcopy dependency data.
4819       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4820           ElSize,
4821           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4822       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4823       Address DepAddr =
4824           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4825                   DependenciesArray.getAlignment());
4826       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4827 
4828       // Increase pos.
4829       // pos += size;
4830       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4831       CGF.EmitStoreOfScalar(Add, PosLVal);
4832     }
4833   }
4834 }
4835 
4836 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4837     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4838     SourceLocation Loc) {
4839   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4840         return D.DepExprs.empty();
4841       }))
4842     return std::make_pair(nullptr, Address::invalid());
4843   // Process list of dependencies.
4844   ASTContext &C = CGM.getContext();
4845   Address DependenciesArray = Address::invalid();
4846   llvm::Value *NumOfElements = nullptr;
4847   unsigned NumDependencies = std::accumulate(
4848       Dependencies.begin(), Dependencies.end(), 0,
4849       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4850         return D.DepKind == OMPC_DEPEND_depobj
4851                    ? V
4852                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4853       });
4854   QualType FlagsTy;
4855   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4856   bool HasDepobjDeps = false;
4857   bool HasRegularWithIterators = false;
4858   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4859   llvm::Value *NumOfRegularWithIterators =
4860       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4861   // Calculate number of depobj dependecies and regular deps with the iterators.
4862   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4863     if (D.DepKind == OMPC_DEPEND_depobj) {
4864       SmallVector<llvm::Value *, 4> Sizes =
4865           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4866       for (llvm::Value *Size : Sizes) {
4867         NumOfDepobjElements =
4868             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4869       }
4870       HasDepobjDeps = true;
4871       continue;
4872     }
4873     // Include number of iterations, if any.
4874     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4875       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4876         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4877         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4878         NumOfRegularWithIterators =
4879             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4880       }
4881       HasRegularWithIterators = true;
4882       continue;
4883     }
4884   }
4885 
4886   QualType KmpDependInfoArrayTy;
4887   if (HasDepobjDeps || HasRegularWithIterators) {
4888     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4889                                            /*isSigned=*/false);
4890     if (HasDepobjDeps) {
4891       NumOfElements =
4892           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4893     }
4894     if (HasRegularWithIterators) {
4895       NumOfElements =
4896           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4897     }
4898     OpaqueValueExpr OVE(Loc,
4899                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4900                         VK_RValue);
4901     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4902                                                   RValue::get(NumOfElements));
4903     KmpDependInfoArrayTy =
4904         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4905                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4906     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4907     // Properly emit variable-sized array.
4908     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4909                                          ImplicitParamDecl::Other);
4910     CGF.EmitVarDecl(*PD);
4911     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4912     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4913                                               /*isSigned=*/false);
4914   } else {
4915     KmpDependInfoArrayTy = C.getConstantArrayType(
4916         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4917         ArrayType::Normal, /*IndexTypeQuals=*/0);
4918     DependenciesArray =
4919         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4920     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4921     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4922                                            /*isSigned=*/false);
4923   }
4924   unsigned Pos = 0;
4925   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4926     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4927         Dependencies[I].IteratorExpr)
4928       continue;
4929     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4930                    DependenciesArray);
4931   }
4932   // Copy regular dependecies with iterators.
4933   LValue PosLVal = CGF.MakeAddrLValue(
4934       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4935   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4936   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4937     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4938         !Dependencies[I].IteratorExpr)
4939       continue;
4940     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4941                    DependenciesArray);
4942   }
4943   // Copy final depobj arrays without iterators.
4944   if (HasDepobjDeps) {
4945     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4946       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4947         continue;
4948       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4949                          DependenciesArray);
4950     }
4951   }
4952   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4953       DependenciesArray, CGF.VoidPtrTy);
4954   return std::make_pair(NumOfElements, DependenciesArray);
4955 }
4956 
4957 Address CGOpenMPRuntime::emitDepobjDependClause(
4958     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4959     SourceLocation Loc) {
4960   if (Dependencies.DepExprs.empty())
4961     return Address::invalid();
4962   // Process list of dependencies.
4963   ASTContext &C = CGM.getContext();
4964   Address DependenciesArray = Address::invalid();
4965   unsigned NumDependencies = Dependencies.DepExprs.size();
4966   QualType FlagsTy;
4967   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4968   RecordDecl *KmpDependInfoRD =
4969       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4970 
4971   llvm::Value *Size;
4972   // Define type kmp_depend_info[<Dependencies.size()>];
4973   // For depobj reserve one extra element to store the number of elements.
4974   // It is required to handle depobj(x) update(in) construct.
4975   // kmp_depend_info[<Dependencies.size()>] deps;
4976   llvm::Value *NumDepsVal;
4977   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4978   if (const auto *IE =
4979           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4980     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4981     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4982       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4983       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4984       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4985     }
4986     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4987                                     NumDepsVal);
4988     CharUnits SizeInBytes =
4989         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4990     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4991     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4992     NumDepsVal =
4993         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4994   } else {
4995     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4996         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4997         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4998     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4999     Size = CGM.getSize(Sz.alignTo(Align));
5000     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5001   }
5002   // Need to allocate on the dynamic memory.
5003   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5004   // Use default allocator.
5005   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5006   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5007 
5008   llvm::Value *Addr =
5009       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5010                               CGM.getModule(), OMPRTL___kmpc_alloc),
5011                           Args, ".dep.arr.addr");
5012   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5013       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5014   DependenciesArray = Address(Addr, Align);
5015   // Write number of elements in the first element of array for depobj.
5016   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5017   // deps[i].base_addr = NumDependencies;
5018   LValue BaseAddrLVal = CGF.EmitLValueForField(
5019       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5020   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5021   llvm::PointerUnion<unsigned *, LValue *> Pos;
5022   unsigned Idx = 1;
5023   LValue PosLVal;
5024   if (Dependencies.IteratorExpr) {
5025     PosLVal = CGF.MakeAddrLValue(
5026         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5027         C.getSizeType());
5028     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5029                           /*IsInit=*/true);
5030     Pos = &PosLVal;
5031   } else {
5032     Pos = &Idx;
5033   }
5034   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5035   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5036       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5037   return DependenciesArray;
5038 }
5039 
5040 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5041                                         SourceLocation Loc) {
5042   ASTContext &C = CGM.getContext();
5043   QualType FlagsTy;
5044   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5045   LValue Base = CGF.EmitLoadOfPointerLValue(
5046       DepobjLVal.getAddress(CGF),
5047       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5048   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5049   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5050       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5051   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5052       Addr.getPointer(),
5053       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5054   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5055                                                                CGF.VoidPtrTy);
5056   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5057   // Use default allocator.
5058   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5059   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5060 
5061   // _kmpc_free(gtid, addr, nullptr);
5062   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5063                                 CGM.getModule(), OMPRTL___kmpc_free),
5064                             Args);
5065 }
5066 
5067 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5068                                        OpenMPDependClauseKind NewDepKind,
5069                                        SourceLocation Loc) {
5070   ASTContext &C = CGM.getContext();
5071   QualType FlagsTy;
5072   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5073   RecordDecl *KmpDependInfoRD =
5074       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5075   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5076   llvm::Value *NumDeps;
5077   LValue Base;
5078   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5079 
5080   Address Begin = Base.getAddress(CGF);
5081   // Cast from pointer to array type to pointer to single element.
5082   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5083   // The basic structure here is a while-do loop.
5084   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5085   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5086   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5087   CGF.EmitBlock(BodyBB);
5088   llvm::PHINode *ElementPHI =
5089       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5090   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5091   Begin = Address(ElementPHI, Begin.getAlignment());
5092   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5093                             Base.getTBAAInfo());
5094   // deps[i].flags = NewDepKind;
5095   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5096   LValue FlagsLVal = CGF.EmitLValueForField(
5097       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5098   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5099                         FlagsLVal);
5100 
5101   // Shift the address forward by one element.
5102   Address ElementNext =
5103       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5104   ElementPHI->addIncoming(ElementNext.getPointer(),
5105                           CGF.Builder.GetInsertBlock());
5106   llvm::Value *IsEmpty =
5107       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5108   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5109   // Done.
5110   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5111 }
5112 
5113 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5114                                    const OMPExecutableDirective &D,
5115                                    llvm::Function *TaskFunction,
5116                                    QualType SharedsTy, Address Shareds,
5117                                    const Expr *IfCond,
5118                                    const OMPTaskDataTy &Data) {
5119   if (!CGF.HaveInsertPoint())
5120     return;
5121 
5122   TaskResultTy Result =
5123       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5124   llvm::Value *NewTask = Result.NewTask;
5125   llvm::Function *TaskEntry = Result.TaskEntry;
5126   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5127   LValue TDBase = Result.TDBase;
5128   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5129   // Process list of dependences.
5130   Address DependenciesArray = Address::invalid();
5131   llvm::Value *NumOfElements;
5132   std::tie(NumOfElements, DependenciesArray) =
5133       emitDependClause(CGF, Data.Dependences, Loc);
5134 
5135   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5136   // libcall.
5137   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5138   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5139   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5140   // list is not empty
5141   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5142   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5143   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5144   llvm::Value *DepTaskArgs[7];
5145   if (!Data.Dependences.empty()) {
5146     DepTaskArgs[0] = UpLoc;
5147     DepTaskArgs[1] = ThreadID;
5148     DepTaskArgs[2] = NewTask;
5149     DepTaskArgs[3] = NumOfElements;
5150     DepTaskArgs[4] = DependenciesArray.getPointer();
5151     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5152     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5153   }
5154   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5155                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5156     if (!Data.Tied) {
5157       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5158       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5159       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5160     }
5161     if (!Data.Dependences.empty()) {
5162       CGF.EmitRuntimeCall(
5163           OMPBuilder.getOrCreateRuntimeFunction(
5164               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5165           DepTaskArgs);
5166     } else {
5167       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5168                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5169                           TaskArgs);
5170     }
5171     // Check if parent region is untied and build return for untied task;
5172     if (auto *Region =
5173             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5174       Region->emitUntiedSwitch(CGF);
5175   };
5176 
5177   llvm::Value *DepWaitTaskArgs[6];
5178   if (!Data.Dependences.empty()) {
5179     DepWaitTaskArgs[0] = UpLoc;
5180     DepWaitTaskArgs[1] = ThreadID;
5181     DepWaitTaskArgs[2] = NumOfElements;
5182     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5183     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5184     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5185   }
5186   auto &M = CGM.getModule();
5187   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5188                         TaskEntry, &Data, &DepWaitTaskArgs,
5189                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5190     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5191     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5192     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5193     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5194     // is specified.
5195     if (!Data.Dependences.empty())
5196       CGF.EmitRuntimeCall(
5197           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5198           DepWaitTaskArgs);
5199     // Call proxy_task_entry(gtid, new_task);
5200     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5201                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5202       Action.Enter(CGF);
5203       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5204       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5205                                                           OutlinedFnArgs);
5206     };
5207 
5208     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5209     // kmp_task_t *new_task);
5210     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5211     // kmp_task_t *new_task);
5212     RegionCodeGenTy RCG(CodeGen);
5213     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5214                               M, OMPRTL___kmpc_omp_task_begin_if0),
5215                           TaskArgs,
5216                           OMPBuilder.getOrCreateRuntimeFunction(
5217                               M, OMPRTL___kmpc_omp_task_complete_if0),
5218                           TaskArgs);
5219     RCG.setAction(Action);
5220     RCG(CGF);
5221   };
5222 
5223   if (IfCond) {
5224     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5225   } else {
5226     RegionCodeGenTy ThenRCG(ThenCodeGen);
5227     ThenRCG(CGF);
5228   }
5229 }
5230 
5231 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5232                                        const OMPLoopDirective &D,
5233                                        llvm::Function *TaskFunction,
5234                                        QualType SharedsTy, Address Shareds,
5235                                        const Expr *IfCond,
5236                                        const OMPTaskDataTy &Data) {
5237   if (!CGF.HaveInsertPoint())
5238     return;
5239   TaskResultTy Result =
5240       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5241   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5242   // libcall.
5243   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5244   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5245   // sched, kmp_uint64 grainsize, void *task_dup);
5246   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5247   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5248   llvm::Value *IfVal;
5249   if (IfCond) {
5250     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5251                                       /*isSigned=*/true);
5252   } else {
5253     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5254   }
5255 
5256   LValue LBLVal = CGF.EmitLValueForField(
5257       Result.TDBase,
5258       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5259   const auto *LBVar =
5260       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5261   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5262                        LBLVal.getQuals(),
5263                        /*IsInitializer=*/true);
5264   LValue UBLVal = CGF.EmitLValueForField(
5265       Result.TDBase,
5266       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5267   const auto *UBVar =
5268       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5269   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5270                        UBLVal.getQuals(),
5271                        /*IsInitializer=*/true);
5272   LValue StLVal = CGF.EmitLValueForField(
5273       Result.TDBase,
5274       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5275   const auto *StVar =
5276       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5277   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5278                        StLVal.getQuals(),
5279                        /*IsInitializer=*/true);
5280   // Store reductions address.
5281   LValue RedLVal = CGF.EmitLValueForField(
5282       Result.TDBase,
5283       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5284   if (Data.Reductions) {
5285     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5286   } else {
5287     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5288                                CGF.getContext().VoidPtrTy);
5289   }
5290   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5291   llvm::Value *TaskArgs[] = {
5292       UpLoc,
5293       ThreadID,
5294       Result.NewTask,
5295       IfVal,
5296       LBLVal.getPointer(CGF),
5297       UBLVal.getPointer(CGF),
5298       CGF.EmitLoadOfScalar(StLVal, Loc),
5299       llvm::ConstantInt::getSigned(
5300           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5301       llvm::ConstantInt::getSigned(
5302           CGF.IntTy, Data.Schedule.getPointer()
5303                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5304                          : NoSchedule),
5305       Data.Schedule.getPointer()
5306           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5307                                       /*isSigned=*/false)
5308           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5309       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5310                              Result.TaskDupFn, CGF.VoidPtrTy)
5311                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5312   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5313                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5314                       TaskArgs);
5315 }
5316 
5317 /// Emit reduction operation for each element of array (required for
5318 /// array sections) LHS op = RHS.
5319 /// \param Type Type of array.
5320 /// \param LHSVar Variable on the left side of the reduction operation
5321 /// (references element of array in original variable).
5322 /// \param RHSVar Variable on the right side of the reduction operation
5323 /// (references element of array in original variable).
5324 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5325 /// RHSVar.
5326 static void EmitOMPAggregateReduction(
5327     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5328     const VarDecl *RHSVar,
5329     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5330                                   const Expr *, const Expr *)> &RedOpGen,
5331     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5332     const Expr *UpExpr = nullptr) {
5333   // Perform element-by-element initialization.
5334   QualType ElementTy;
5335   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5336   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5337 
5338   // Drill down to the base element type on both arrays.
5339   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5340   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5341 
5342   llvm::Value *RHSBegin = RHSAddr.getPointer();
5343   llvm::Value *LHSBegin = LHSAddr.getPointer();
5344   // Cast from pointer to array type to pointer to single element.
5345   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5346   // The basic structure here is a while-do loop.
5347   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5348   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5349   llvm::Value *IsEmpty =
5350       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5351   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5352 
5353   // Enter the loop body, making that address the current address.
5354   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5355   CGF.EmitBlock(BodyBB);
5356 
5357   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5358 
5359   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5360       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5361   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5362   Address RHSElementCurrent =
5363       Address(RHSElementPHI,
5364               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5365 
5366   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5367       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5368   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5369   Address LHSElementCurrent =
5370       Address(LHSElementPHI,
5371               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5372 
5373   // Emit copy.
5374   CodeGenFunction::OMPPrivateScope Scope(CGF);
5375   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5376   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5377   Scope.Privatize();
5378   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5379   Scope.ForceCleanup();
5380 
5381   // Shift the address forward by one element.
5382   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5383       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5384   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5385       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5386   // Check whether we've reached the end.
5387   llvm::Value *Done =
5388       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5389   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5390   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5391   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5392 
5393   // Done.
5394   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5395 }
5396 
5397 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5398 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5399 /// UDR combiner function.
5400 static void emitReductionCombiner(CodeGenFunction &CGF,
5401                                   const Expr *ReductionOp) {
5402   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5403     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5404       if (const auto *DRE =
5405               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5406         if (const auto *DRD =
5407                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5408           std::pair<llvm::Function *, llvm::Function *> Reduction =
5409               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5410           RValue Func = RValue::get(Reduction.first);
5411           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5412           CGF.EmitIgnoredExpr(ReductionOp);
5413           return;
5414         }
5415   CGF.EmitIgnoredExpr(ReductionOp);
5416 }
5417 
5418 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5419     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5420     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5421     ArrayRef<const Expr *> ReductionOps) {
5422   ASTContext &C = CGM.getContext();
5423 
5424   // void reduction_func(void *LHSArg, void *RHSArg);
5425   FunctionArgList Args;
5426   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5427                            ImplicitParamDecl::Other);
5428   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5429                            ImplicitParamDecl::Other);
5430   Args.push_back(&LHSArg);
5431   Args.push_back(&RHSArg);
5432   const auto &CGFI =
5433       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5434   std::string Name = getName({"omp", "reduction", "reduction_func"});
5435   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5436                                     llvm::GlobalValue::InternalLinkage, Name,
5437                                     &CGM.getModule());
5438   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5439   Fn->setDoesNotRecurse();
5440   CodeGenFunction CGF(CGM);
5441   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5442 
5443   // Dst = (void*[n])(LHSArg);
5444   // Src = (void*[n])(RHSArg);
5445   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5446       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5447       ArgsType), CGF.getPointerAlign());
5448   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5449       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5450       ArgsType), CGF.getPointerAlign());
5451 
5452   //  ...
5453   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5454   //  ...
5455   CodeGenFunction::OMPPrivateScope Scope(CGF);
5456   auto IPriv = Privates.begin();
5457   unsigned Idx = 0;
5458   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5459     const auto *RHSVar =
5460         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5461     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5462       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5463     });
5464     const auto *LHSVar =
5465         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5466     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5467       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5468     });
5469     QualType PrivTy = (*IPriv)->getType();
5470     if (PrivTy->isVariablyModifiedType()) {
5471       // Get array size and emit VLA type.
5472       ++Idx;
5473       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5474       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5475       const VariableArrayType *VLA =
5476           CGF.getContext().getAsVariableArrayType(PrivTy);
5477       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5478       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5479           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5480       CGF.EmitVariablyModifiedType(PrivTy);
5481     }
5482   }
5483   Scope.Privatize();
5484   IPriv = Privates.begin();
5485   auto ILHS = LHSExprs.begin();
5486   auto IRHS = RHSExprs.begin();
5487   for (const Expr *E : ReductionOps) {
5488     if ((*IPriv)->getType()->isArrayType()) {
5489       // Emit reduction for array section.
5490       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5491       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5492       EmitOMPAggregateReduction(
5493           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5494           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5495             emitReductionCombiner(CGF, E);
5496           });
5497     } else {
5498       // Emit reduction for array subscript or single variable.
5499       emitReductionCombiner(CGF, E);
5500     }
5501     ++IPriv;
5502     ++ILHS;
5503     ++IRHS;
5504   }
5505   Scope.ForceCleanup();
5506   CGF.FinishFunction();
5507   return Fn;
5508 }
5509 
5510 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5511                                                   const Expr *ReductionOp,
5512                                                   const Expr *PrivateRef,
5513                                                   const DeclRefExpr *LHS,
5514                                                   const DeclRefExpr *RHS) {
5515   if (PrivateRef->getType()->isArrayType()) {
5516     // Emit reduction for array section.
5517     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5518     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5519     EmitOMPAggregateReduction(
5520         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5521         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5522           emitReductionCombiner(CGF, ReductionOp);
5523         });
5524   } else {
5525     // Emit reduction for array subscript or single variable.
5526     emitReductionCombiner(CGF, ReductionOp);
5527   }
5528 }
5529 
5530 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5531                                     ArrayRef<const Expr *> Privates,
5532                                     ArrayRef<const Expr *> LHSExprs,
5533                                     ArrayRef<const Expr *> RHSExprs,
5534                                     ArrayRef<const Expr *> ReductionOps,
5535                                     ReductionOptionsTy Options) {
5536   if (!CGF.HaveInsertPoint())
5537     return;
5538 
5539   bool WithNowait = Options.WithNowait;
5540   bool SimpleReduction = Options.SimpleReduction;
5541 
5542   // Next code should be emitted for reduction:
5543   //
5544   // static kmp_critical_name lock = { 0 };
5545   //
5546   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5547   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5548   //  ...
5549   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5550   //  *(Type<n>-1*)rhs[<n>-1]);
5551   // }
5552   //
5553   // ...
5554   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5555   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5556   // RedList, reduce_func, &<lock>)) {
5557   // case 1:
5558   //  ...
5559   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5560   //  ...
5561   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5562   // break;
5563   // case 2:
5564   //  ...
5565   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5566   //  ...
5567   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5568   // break;
5569   // default:;
5570   // }
5571   //
5572   // if SimpleReduction is true, only the next code is generated:
5573   //  ...
5574   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5575   //  ...
5576 
5577   ASTContext &C = CGM.getContext();
5578 
5579   if (SimpleReduction) {
5580     CodeGenFunction::RunCleanupsScope Scope(CGF);
5581     auto IPriv = Privates.begin();
5582     auto ILHS = LHSExprs.begin();
5583     auto IRHS = RHSExprs.begin();
5584     for (const Expr *E : ReductionOps) {
5585       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5586                                   cast<DeclRefExpr>(*IRHS));
5587       ++IPriv;
5588       ++ILHS;
5589       ++IRHS;
5590     }
5591     return;
5592   }
5593 
5594   // 1. Build a list of reduction variables.
5595   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5596   auto Size = RHSExprs.size();
5597   for (const Expr *E : Privates) {
5598     if (E->getType()->isVariablyModifiedType())
5599       // Reserve place for array size.
5600       ++Size;
5601   }
5602   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5603   QualType ReductionArrayTy =
5604       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5605                              /*IndexTypeQuals=*/0);
5606   Address ReductionList =
5607       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5608   auto IPriv = Privates.begin();
5609   unsigned Idx = 0;
5610   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5611     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5612     CGF.Builder.CreateStore(
5613         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5614             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5615         Elem);
5616     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5617       // Store array size.
5618       ++Idx;
5619       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620       llvm::Value *Size = CGF.Builder.CreateIntCast(
5621           CGF.getVLASize(
5622                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5623               .NumElts,
5624           CGF.SizeTy, /*isSigned=*/false);
5625       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5626                               Elem);
5627     }
5628   }
5629 
5630   // 2. Emit reduce_func().
5631   llvm::Function *ReductionFn = emitReductionFunction(
5632       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5633       LHSExprs, RHSExprs, ReductionOps);
5634 
5635   // 3. Create static kmp_critical_name lock = { 0 };
5636   std::string Name = getName({"reduction"});
5637   llvm::Value *Lock = getCriticalRegionLock(Name);
5638 
5639   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5640   // RedList, reduce_func, &<lock>);
5641   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5642   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5643   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5644   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5645       ReductionList.getPointer(), CGF.VoidPtrTy);
5646   llvm::Value *Args[] = {
5647       IdentTLoc,                             // ident_t *<loc>
5648       ThreadId,                              // i32 <gtid>
5649       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5650       ReductionArrayTySize,                  // size_type sizeof(RedList)
5651       RL,                                    // void *RedList
5652       ReductionFn, // void (*) (void *, void *) <reduce_func>
5653       Lock         // kmp_critical_name *&<lock>
5654   };
5655   llvm::Value *Res = CGF.EmitRuntimeCall(
5656       OMPBuilder.getOrCreateRuntimeFunction(
5657           CGM.getModule(),
5658           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5659       Args);
5660 
5661   // 5. Build switch(res)
5662   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5663   llvm::SwitchInst *SwInst =
5664       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5665 
5666   // 6. Build case 1:
5667   //  ...
5668   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5669   //  ...
5670   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5671   // break;
5672   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5673   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5674   CGF.EmitBlock(Case1BB);
5675 
5676   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5677   llvm::Value *EndArgs[] = {
5678       IdentTLoc, // ident_t *<loc>
5679       ThreadId,  // i32 <gtid>
5680       Lock       // kmp_critical_name *&<lock>
5681   };
5682   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5683                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5684     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5685     auto IPriv = Privates.begin();
5686     auto ILHS = LHSExprs.begin();
5687     auto IRHS = RHSExprs.begin();
5688     for (const Expr *E : ReductionOps) {
5689       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5690                                      cast<DeclRefExpr>(*IRHS));
5691       ++IPriv;
5692       ++ILHS;
5693       ++IRHS;
5694     }
5695   };
5696   RegionCodeGenTy RCG(CodeGen);
5697   CommonActionTy Action(
5698       nullptr, llvm::None,
5699       OMPBuilder.getOrCreateRuntimeFunction(
5700           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5701                                       : OMPRTL___kmpc_end_reduce),
5702       EndArgs);
5703   RCG.setAction(Action);
5704   RCG(CGF);
5705 
5706   CGF.EmitBranch(DefaultBB);
5707 
5708   // 7. Build case 2:
5709   //  ...
5710   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5711   //  ...
5712   // break;
5713   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5714   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5715   CGF.EmitBlock(Case2BB);
5716 
5717   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5718                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5719     auto ILHS = LHSExprs.begin();
5720     auto IRHS = RHSExprs.begin();
5721     auto IPriv = Privates.begin();
5722     for (const Expr *E : ReductionOps) {
5723       const Expr *XExpr = nullptr;
5724       const Expr *EExpr = nullptr;
5725       const Expr *UpExpr = nullptr;
5726       BinaryOperatorKind BO = BO_Comma;
5727       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5728         if (BO->getOpcode() == BO_Assign) {
5729           XExpr = BO->getLHS();
5730           UpExpr = BO->getRHS();
5731         }
5732       }
5733       // Try to emit update expression as a simple atomic.
5734       const Expr *RHSExpr = UpExpr;
5735       if (RHSExpr) {
5736         // Analyze RHS part of the whole expression.
5737         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5738                 RHSExpr->IgnoreParenImpCasts())) {
5739           // If this is a conditional operator, analyze its condition for
5740           // min/max reduction operator.
5741           RHSExpr = ACO->getCond();
5742         }
5743         if (const auto *BORHS =
5744                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5745           EExpr = BORHS->getRHS();
5746           BO = BORHS->getOpcode();
5747         }
5748       }
5749       if (XExpr) {
5750         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5751         auto &&AtomicRedGen = [BO, VD,
5752                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5753                                     const Expr *EExpr, const Expr *UpExpr) {
5754           LValue X = CGF.EmitLValue(XExpr);
5755           RValue E;
5756           if (EExpr)
5757             E = CGF.EmitAnyExpr(EExpr);
5758           CGF.EmitOMPAtomicSimpleUpdateExpr(
5759               X, E, BO, /*IsXLHSInRHSPart=*/true,
5760               llvm::AtomicOrdering::Monotonic, Loc,
5761               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5762                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5763                 PrivateScope.addPrivate(
5764                     VD, [&CGF, VD, XRValue, Loc]() {
5765                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5766                       CGF.emitOMPSimpleStore(
5767                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5768                           VD->getType().getNonReferenceType(), Loc);
5769                       return LHSTemp;
5770                     });
5771                 (void)PrivateScope.Privatize();
5772                 return CGF.EmitAnyExpr(UpExpr);
5773               });
5774         };
5775         if ((*IPriv)->getType()->isArrayType()) {
5776           // Emit atomic reduction for array section.
5777           const auto *RHSVar =
5778               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5779           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5780                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5781         } else {
5782           // Emit atomic reduction for array subscript or single variable.
5783           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5784         }
5785       } else {
5786         // Emit as a critical region.
5787         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5788                                            const Expr *, const Expr *) {
5789           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5790           std::string Name = RT.getName({"atomic_reduction"});
5791           RT.emitCriticalRegion(
5792               CGF, Name,
5793               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5794                 Action.Enter(CGF);
5795                 emitReductionCombiner(CGF, E);
5796               },
5797               Loc);
5798         };
5799         if ((*IPriv)->getType()->isArrayType()) {
5800           const auto *LHSVar =
5801               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5802           const auto *RHSVar =
5803               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5804           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5805                                     CritRedGen);
5806         } else {
5807           CritRedGen(CGF, nullptr, nullptr, nullptr);
5808         }
5809       }
5810       ++ILHS;
5811       ++IRHS;
5812       ++IPriv;
5813     }
5814   };
5815   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5816   if (!WithNowait) {
5817     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5818     llvm::Value *EndArgs[] = {
5819         IdentTLoc, // ident_t *<loc>
5820         ThreadId,  // i32 <gtid>
5821         Lock       // kmp_critical_name *&<lock>
5822     };
5823     CommonActionTy Action(nullptr, llvm::None,
5824                           OMPBuilder.getOrCreateRuntimeFunction(
5825                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5826                           EndArgs);
5827     AtomicRCG.setAction(Action);
5828     AtomicRCG(CGF);
5829   } else {
5830     AtomicRCG(CGF);
5831   }
5832 
5833   CGF.EmitBranch(DefaultBB);
5834   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5835 }
5836 
5837 /// Generates unique name for artificial threadprivate variables.
5838 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5839 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5840                                       const Expr *Ref) {
5841   SmallString<256> Buffer;
5842   llvm::raw_svector_ostream Out(Buffer);
5843   const clang::DeclRefExpr *DE;
5844   const VarDecl *D = ::getBaseDecl(Ref, DE);
5845   if (!D)
5846     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5847   D = D->getCanonicalDecl();
5848   std::string Name = CGM.getOpenMPRuntime().getName(
5849       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5850   Out << Prefix << Name << "_"
5851       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5852   return std::string(Out.str());
5853 }
5854 
5855 /// Emits reduction initializer function:
5856 /// \code
5857 /// void @.red_init(void* %arg, void* %orig) {
5858 /// %0 = bitcast void* %arg to <type>*
5859 /// store <type> <init>, <type>* %0
5860 /// ret void
5861 /// }
5862 /// \endcode
5863 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5864                                            SourceLocation Loc,
5865                                            ReductionCodeGen &RCG, unsigned N) {
5866   ASTContext &C = CGM.getContext();
5867   QualType VoidPtrTy = C.VoidPtrTy;
5868   VoidPtrTy.addRestrict();
5869   FunctionArgList Args;
5870   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5871                           ImplicitParamDecl::Other);
5872   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5873                               ImplicitParamDecl::Other);
5874   Args.emplace_back(&Param);
5875   Args.emplace_back(&ParamOrig);
5876   const auto &FnInfo =
5877       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5878   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5879   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5880   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5881                                     Name, &CGM.getModule());
5882   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5883   Fn->setDoesNotRecurse();
5884   CodeGenFunction CGF(CGM);
5885   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5886   Address PrivateAddr = CGF.EmitLoadOfPointer(
5887       CGF.GetAddrOfLocalVar(&Param),
5888       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5889   llvm::Value *Size = nullptr;
5890   // If the size of the reduction item is non-constant, load it from global
5891   // threadprivate variable.
5892   if (RCG.getSizes(N).second) {
5893     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5894         CGF, CGM.getContext().getSizeType(),
5895         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5896     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5897                                 CGM.getContext().getSizeType(), Loc);
5898   }
5899   RCG.emitAggregateType(CGF, N, Size);
5900   LValue OrigLVal;
5901   // If initializer uses initializer from declare reduction construct, emit a
5902   // pointer to the address of the original reduction item (reuired by reduction
5903   // initializer)
5904   if (RCG.usesReductionInitializer(N)) {
5905     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5906     SharedAddr = CGF.EmitLoadOfPointer(
5907         SharedAddr,
5908         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5909     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5910   } else {
5911     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5912         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5913         CGM.getContext().VoidPtrTy);
5914   }
5915   // Emit the initializer:
5916   // %0 = bitcast void* %arg to <type>*
5917   // store <type> <init>, <type>* %0
5918   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5919                          [](CodeGenFunction &) { return false; });
5920   CGF.FinishFunction();
5921   return Fn;
5922 }
5923 
5924 /// Emits reduction combiner function:
5925 /// \code
5926 /// void @.red_comb(void* %arg0, void* %arg1) {
5927 /// %lhs = bitcast void* %arg0 to <type>*
5928 /// %rhs = bitcast void* %arg1 to <type>*
5929 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5930 /// store <type> %2, <type>* %lhs
5931 /// ret void
5932 /// }
5933 /// \endcode
5934 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5935                                            SourceLocation Loc,
5936                                            ReductionCodeGen &RCG, unsigned N,
5937                                            const Expr *ReductionOp,
5938                                            const Expr *LHS, const Expr *RHS,
5939                                            const Expr *PrivateRef) {
5940   ASTContext &C = CGM.getContext();
5941   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5942   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5943   FunctionArgList Args;
5944   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5945                                C.VoidPtrTy, ImplicitParamDecl::Other);
5946   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5947                             ImplicitParamDecl::Other);
5948   Args.emplace_back(&ParamInOut);
5949   Args.emplace_back(&ParamIn);
5950   const auto &FnInfo =
5951       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5952   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5953   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5954   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5955                                     Name, &CGM.getModule());
5956   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5957   Fn->setDoesNotRecurse();
5958   CodeGenFunction CGF(CGM);
5959   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5960   llvm::Value *Size = nullptr;
5961   // If the size of the reduction item is non-constant, load it from global
5962   // threadprivate variable.
5963   if (RCG.getSizes(N).second) {
5964     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5965         CGF, CGM.getContext().getSizeType(),
5966         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5967     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5968                                 CGM.getContext().getSizeType(), Loc);
5969   }
5970   RCG.emitAggregateType(CGF, N, Size);
5971   // Remap lhs and rhs variables to the addresses of the function arguments.
5972   // %lhs = bitcast void* %arg0 to <type>*
5973   // %rhs = bitcast void* %arg1 to <type>*
5974   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5975   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5976     // Pull out the pointer to the variable.
5977     Address PtrAddr = CGF.EmitLoadOfPointer(
5978         CGF.GetAddrOfLocalVar(&ParamInOut),
5979         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5980     return CGF.Builder.CreateElementBitCast(
5981         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5982   });
5983   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamIn),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5990   });
5991   PrivateScope.Privatize();
5992   // Emit the combiner body:
5993   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5994   // store <type> %2, <type>* %lhs
5995   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5996       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5997       cast<DeclRefExpr>(RHS));
5998   CGF.FinishFunction();
5999   return Fn;
6000 }
6001 
6002 /// Emits reduction finalizer function:
6003 /// \code
6004 /// void @.red_fini(void* %arg) {
6005 /// %0 = bitcast void* %arg to <type>*
6006 /// <destroy>(<type>* %0)
6007 /// ret void
6008 /// }
6009 /// \endcode
6010 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6011                                            SourceLocation Loc,
6012                                            ReductionCodeGen &RCG, unsigned N) {
6013   if (!RCG.needCleanups(N))
6014     return nullptr;
6015   ASTContext &C = CGM.getContext();
6016   FunctionArgList Args;
6017   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6018                           ImplicitParamDecl::Other);
6019   Args.emplace_back(&Param);
6020   const auto &FnInfo =
6021       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6022   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6023   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6024   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6025                                     Name, &CGM.getModule());
6026   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6027   Fn->setDoesNotRecurse();
6028   CodeGenFunction CGF(CGM);
6029   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6030   Address PrivateAddr = CGF.EmitLoadOfPointer(
6031       CGF.GetAddrOfLocalVar(&Param),
6032       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6033   llvm::Value *Size = nullptr;
6034   // If the size of the reduction item is non-constant, load it from global
6035   // threadprivate variable.
6036   if (RCG.getSizes(N).second) {
6037     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6038         CGF, CGM.getContext().getSizeType(),
6039         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6040     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6041                                 CGM.getContext().getSizeType(), Loc);
6042   }
6043   RCG.emitAggregateType(CGF, N, Size);
6044   // Emit the finalizer body:
6045   // <destroy>(<type>* %0)
6046   RCG.emitCleanups(CGF, N, PrivateAddr);
6047   CGF.FinishFunction(Loc);
6048   return Fn;
6049 }
6050 
6051 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6052     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6053     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6054   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6055     return nullptr;
6056 
6057   // Build typedef struct:
6058   // kmp_taskred_input {
6059   //   void *reduce_shar; // shared reduction item
6060   //   void *reduce_orig; // original reduction item used for initialization
6061   //   size_t reduce_size; // size of data item
6062   //   void *reduce_init; // data initialization routine
6063   //   void *reduce_fini; // data finalization routine
6064   //   void *reduce_comb; // data combiner routine
6065   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6066   // } kmp_taskred_input_t;
6067   ASTContext &C = CGM.getContext();
6068   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6069   RD->startDefinition();
6070   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6072   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6073   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6074   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6075   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6076   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6077       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6078   RD->completeDefinition();
6079   QualType RDType = C.getRecordType(RD);
6080   unsigned Size = Data.ReductionVars.size();
6081   llvm::APInt ArraySize(/*numBits=*/64, Size);
6082   QualType ArrayRDType = C.getConstantArrayType(
6083       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6084   // kmp_task_red_input_t .rd_input.[Size];
6085   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6086   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6087                        Data.ReductionCopies, Data.ReductionOps);
6088   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6089     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6090     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6091                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6092     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6093         TaskRedInput.getPointer(), Idxs,
6094         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6095         ".rd_input.gep.");
6096     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6097     // ElemLVal.reduce_shar = &Shareds[Cnt];
6098     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6099     RCG.emitSharedOrigLValue(CGF, Cnt);
6100     llvm::Value *CastedShared =
6101         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6102     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6103     // ElemLVal.reduce_orig = &Origs[Cnt];
6104     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6105     llvm::Value *CastedOrig =
6106         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6107     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6108     RCG.emitAggregateType(CGF, Cnt);
6109     llvm::Value *SizeValInChars;
6110     llvm::Value *SizeVal;
6111     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6112     // We use delayed creation/initialization for VLAs and array sections. It is
6113     // required because runtime does not provide the way to pass the sizes of
6114     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6115     // threadprivate global variables are used to store these values and use
6116     // them in the functions.
6117     bool DelayedCreation = !!SizeVal;
6118     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6119                                                /*isSigned=*/false);
6120     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6121     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6122     // ElemLVal.reduce_init = init;
6123     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6124     llvm::Value *InitAddr =
6125         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6126     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6127     // ElemLVal.reduce_fini = fini;
6128     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6129     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6130     llvm::Value *FiniAddr = Fini
6131                                 ? CGF.EmitCastToVoidPtr(Fini)
6132                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6133     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6134     // ElemLVal.reduce_comb = comb;
6135     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6136     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6137         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6138         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6139     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6140     // ElemLVal.flags = 0;
6141     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6142     if (DelayedCreation) {
6143       CGF.EmitStoreOfScalar(
6144           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6145           FlagsLVal);
6146     } else
6147       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6148                                  FlagsLVal.getType());
6149   }
6150   if (Data.IsReductionWithTaskMod) {
6151     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6152     // is_ws, int num, void *data);
6153     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6154     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6155                                                   CGM.IntTy, /*isSigned=*/true);
6156     llvm::Value *Args[] = {
6157         IdentTLoc, GTid,
6158         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6159                                /*isSigned=*/true),
6160         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6161         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6162             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6163     return CGF.EmitRuntimeCall(
6164         OMPBuilder.getOrCreateRuntimeFunction(
6165             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6166         Args);
6167   }
6168   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6169   llvm::Value *Args[] = {
6170       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6171                                 /*isSigned=*/true),
6172       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6173       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6174                                                       CGM.VoidPtrTy)};
6175   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6176                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6177                              Args);
6178 }
6179 
6180 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6181                                             SourceLocation Loc,
6182                                             bool IsWorksharingReduction) {
6183   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6184   // is_ws, int num, void *data);
6185   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6186   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6187                                                 CGM.IntTy, /*isSigned=*/true);
6188   llvm::Value *Args[] = {IdentTLoc, GTid,
6189                          llvm::ConstantInt::get(CGM.IntTy,
6190                                                 IsWorksharingReduction ? 1 : 0,
6191                                                 /*isSigned=*/true)};
6192   (void)CGF.EmitRuntimeCall(
6193       OMPBuilder.getOrCreateRuntimeFunction(
6194           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6195       Args);
6196 }
6197 
6198 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6199                                               SourceLocation Loc,
6200                                               ReductionCodeGen &RCG,
6201                                               unsigned N) {
6202   auto Sizes = RCG.getSizes(N);
6203   // Emit threadprivate global variable if the type is non-constant
6204   // (Sizes.second = nullptr).
6205   if (Sizes.second) {
6206     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6207                                                      /*isSigned=*/false);
6208     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6209         CGF, CGM.getContext().getSizeType(),
6210         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6211     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6212   }
6213 }
6214 
6215 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6216                                               SourceLocation Loc,
6217                                               llvm::Value *ReductionsPtr,
6218                                               LValue SharedLVal) {
6219   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6220   // *d);
6221   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6222                                                    CGM.IntTy,
6223                                                    /*isSigned=*/true),
6224                          ReductionsPtr,
6225                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6226                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6227   return Address(
6228       CGF.EmitRuntimeCall(
6229           OMPBuilder.getOrCreateRuntimeFunction(
6230               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6231           Args),
6232       SharedLVal.getAlignment());
6233 }
6234 
6235 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6236                                        SourceLocation Loc) {
6237   if (!CGF.HaveInsertPoint())
6238     return;
6239 
6240   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6241     OMPBuilder.createTaskwait(CGF.Builder);
6242   } else {
6243     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6244     // global_tid);
6245     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6246     // Ignore return result until untied tasks are supported.
6247     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6248                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6249                         Args);
6250   }
6251 
6252   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6253     Region->emitUntiedSwitch(CGF);
6254 }
6255 
6256 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6257                                            OpenMPDirectiveKind InnerKind,
6258                                            const RegionCodeGenTy &CodeGen,
6259                                            bool HasCancel) {
6260   if (!CGF.HaveInsertPoint())
6261     return;
6262   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6263                                  InnerKind != OMPD_critical &&
6264                                      InnerKind != OMPD_master &&
6265                                      InnerKind != OMPD_masked);
6266   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6267 }
6268 
6269 namespace {
6270 enum RTCancelKind {
6271   CancelNoreq = 0,
6272   CancelParallel = 1,
6273   CancelLoop = 2,
6274   CancelSections = 3,
6275   CancelTaskgroup = 4
6276 };
6277 } // anonymous namespace
6278 
6279 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6280   RTCancelKind CancelKind = CancelNoreq;
6281   if (CancelRegion == OMPD_parallel)
6282     CancelKind = CancelParallel;
6283   else if (CancelRegion == OMPD_for)
6284     CancelKind = CancelLoop;
6285   else if (CancelRegion == OMPD_sections)
6286     CancelKind = CancelSections;
6287   else {
6288     assert(CancelRegion == OMPD_taskgroup);
6289     CancelKind = CancelTaskgroup;
6290   }
6291   return CancelKind;
6292 }
6293 
6294 void CGOpenMPRuntime::emitCancellationPointCall(
6295     CodeGenFunction &CGF, SourceLocation Loc,
6296     OpenMPDirectiveKind CancelRegion) {
6297   if (!CGF.HaveInsertPoint())
6298     return;
6299   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6300   // global_tid, kmp_int32 cncl_kind);
6301   if (auto *OMPRegionInfo =
6302           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6303     // For 'cancellation point taskgroup', the task region info may not have a
6304     // cancel. This may instead happen in another adjacent task.
6305     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6306       llvm::Value *Args[] = {
6307           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6308           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6309       // Ignore return result until untied tasks are supported.
6310       llvm::Value *Result = CGF.EmitRuntimeCall(
6311           OMPBuilder.getOrCreateRuntimeFunction(
6312               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6313           Args);
6314       // if (__kmpc_cancellationpoint()) {
6315       //   exit from construct;
6316       // }
6317       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6318       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6319       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6320       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6321       CGF.EmitBlock(ExitBB);
6322       // exit from construct;
6323       CodeGenFunction::JumpDest CancelDest =
6324           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6325       CGF.EmitBranchThroughCleanup(CancelDest);
6326       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6327     }
6328   }
6329 }
6330 
6331 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6332                                      const Expr *IfCond,
6333                                      OpenMPDirectiveKind CancelRegion) {
6334   if (!CGF.HaveInsertPoint())
6335     return;
6336   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6337   // kmp_int32 cncl_kind);
6338   auto &M = CGM.getModule();
6339   if (auto *OMPRegionInfo =
6340           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6341     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6342                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6343       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6344       llvm::Value *Args[] = {
6345           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6346           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6347       // Ignore return result until untied tasks are supported.
6348       llvm::Value *Result = CGF.EmitRuntimeCall(
6349           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6350       // if (__kmpc_cancel()) {
6351       //   exit from construct;
6352       // }
6353       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6354       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6355       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6356       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6357       CGF.EmitBlock(ExitBB);
6358       // exit from construct;
6359       CodeGenFunction::JumpDest CancelDest =
6360           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6361       CGF.EmitBranchThroughCleanup(CancelDest);
6362       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6363     };
6364     if (IfCond) {
6365       emitIfClause(CGF, IfCond, ThenGen,
6366                    [](CodeGenFunction &, PrePostActionTy &) {});
6367     } else {
6368       RegionCodeGenTy ThenRCG(ThenGen);
6369       ThenRCG(CGF);
6370     }
6371   }
6372 }
6373 
6374 namespace {
6375 /// Cleanup action for uses_allocators support.
6376 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6377   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6378 
6379 public:
6380   OMPUsesAllocatorsActionTy(
6381       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6382       : Allocators(Allocators) {}
6383   void Enter(CodeGenFunction &CGF) override {
6384     if (!CGF.HaveInsertPoint())
6385       return;
6386     for (const auto &AllocatorData : Allocators) {
6387       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6388           CGF, AllocatorData.first, AllocatorData.second);
6389     }
6390   }
6391   void Exit(CodeGenFunction &CGF) override {
6392     if (!CGF.HaveInsertPoint())
6393       return;
6394     for (const auto &AllocatorData : Allocators) {
6395       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6396                                                         AllocatorData.first);
6397     }
6398   }
6399 };
6400 } // namespace
6401 
6402 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6403     const OMPExecutableDirective &D, StringRef ParentName,
6404     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6405     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6406   assert(!ParentName.empty() && "Invalid target region parent name!");
6407   HasEmittedTargetRegion = true;
6408   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6409   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6410     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6411       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6412       if (!D.AllocatorTraits)
6413         continue;
6414       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6415     }
6416   }
6417   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6418   CodeGen.setAction(UsesAllocatorAction);
6419   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6420                                    IsOffloadEntry, CodeGen);
6421 }
6422 
6423 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6424                                              const Expr *Allocator,
6425                                              const Expr *AllocatorTraits) {
6426   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6427   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6428   // Use default memspace handle.
6429   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6430   llvm::Value *NumTraits = llvm::ConstantInt::get(
6431       CGF.IntTy, cast<ConstantArrayType>(
6432                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6433                      ->getSize()
6434                      .getLimitedValue());
6435   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6436   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6437       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6438   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6439                                            AllocatorTraitsLVal.getBaseInfo(),
6440                                            AllocatorTraitsLVal.getTBAAInfo());
6441   llvm::Value *Traits =
6442       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6443 
6444   llvm::Value *AllocatorVal =
6445       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6446                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6447                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6448   // Store to allocator.
6449   CGF.EmitVarDecl(*cast<VarDecl>(
6450       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6451   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6452   AllocatorVal =
6453       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6454                                Allocator->getType(), Allocator->getExprLoc());
6455   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6456 }
6457 
6458 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6459                                              const Expr *Allocator) {
6460   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6461   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6462   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6463   llvm::Value *AllocatorVal =
6464       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6465   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6466                                           CGF.getContext().VoidPtrTy,
6467                                           Allocator->getExprLoc());
6468   (void)CGF.EmitRuntimeCall(
6469       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6470                                             OMPRTL___kmpc_destroy_allocator),
6471       {ThreadId, AllocatorVal});
6472 }
6473 
6474 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6475     const OMPExecutableDirective &D, StringRef ParentName,
6476     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6477     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6478   // Create a unique name for the entry function using the source location
6479   // information of the current target region. The name will be something like:
6480   //
6481   // __omp_offloading_DD_FFFF_PP_lBB
6482   //
6483   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6484   // mangled name of the function that encloses the target region and BB is the
6485   // line number of the target region.
6486 
6487   unsigned DeviceID;
6488   unsigned FileID;
6489   unsigned Line;
6490   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6491                            Line);
6492   SmallString<64> EntryFnName;
6493   {
6494     llvm::raw_svector_ostream OS(EntryFnName);
6495     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6496        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6497   }
6498 
6499   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6500 
6501   CodeGenFunction CGF(CGM, true);
6502   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6503   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6504 
6505   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6506 
6507   // If this target outline function is not an offload entry, we don't need to
6508   // register it.
6509   if (!IsOffloadEntry)
6510     return;
6511 
6512   // The target region ID is used by the runtime library to identify the current
6513   // target region, so it only has to be unique and not necessarily point to
6514   // anything. It could be the pointer to the outlined function that implements
6515   // the target region, but we aren't using that so that the compiler doesn't
6516   // need to keep that, and could therefore inline the host function if proven
6517   // worthwhile during optimization. In the other hand, if emitting code for the
6518   // device, the ID has to be the function address so that it can retrieved from
6519   // the offloading entry and launched by the runtime library. We also mark the
6520   // outlined function to have external linkage in case we are emitting code for
6521   // the device, because these functions will be entry points to the device.
6522 
6523   if (CGM.getLangOpts().OpenMPIsDevice) {
6524     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6525     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6526     OutlinedFn->setDSOLocal(false);
6527     if (CGM.getTriple().isAMDGCN())
6528       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6529   } else {
6530     std::string Name = getName({EntryFnName, "region_id"});
6531     OutlinedFnID = new llvm::GlobalVariable(
6532         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6533         llvm::GlobalValue::WeakAnyLinkage,
6534         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6535   }
6536 
6537   // Register the information for the entry associated with this target region.
6538   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6539       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6540       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6541 }
6542 
6543 /// Checks if the expression is constant or does not have non-trivial function
6544 /// calls.
6545 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6546   // We can skip constant expressions.
6547   // We can skip expressions with trivial calls or simple expressions.
6548   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6549           !E->hasNonTrivialCall(Ctx)) &&
6550          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6551 }
6552 
6553 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6554                                                     const Stmt *Body) {
6555   const Stmt *Child = Body->IgnoreContainers();
6556   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6557     Child = nullptr;
6558     for (const Stmt *S : C->body()) {
6559       if (const auto *E = dyn_cast<Expr>(S)) {
6560         if (isTrivial(Ctx, E))
6561           continue;
6562       }
6563       // Some of the statements can be ignored.
6564       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6565           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6566         continue;
6567       // Analyze declarations.
6568       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6569         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6570               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6571                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6572                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6573                   isa<UsingDirectiveDecl>(D) ||
6574                   isa<OMPDeclareReductionDecl>(D) ||
6575                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6576                 return true;
6577               const auto *VD = dyn_cast<VarDecl>(D);
6578               if (!VD)
6579                 return false;
6580               return VD->isConstexpr() ||
6581                      ((VD->getType().isTrivialType(Ctx) ||
6582                        VD->getType()->isReferenceType()) &&
6583                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6584             }))
6585           continue;
6586       }
6587       // Found multiple children - cannot get the one child only.
6588       if (Child)
6589         return nullptr;
6590       Child = S;
6591     }
6592     if (Child)
6593       Child = Child->IgnoreContainers();
6594   }
6595   return Child;
6596 }
6597 
6598 /// Emit the number of teams for a target directive.  Inspect the num_teams
6599 /// clause associated with a teams construct combined or closely nested
6600 /// with the target directive.
6601 ///
6602 /// Emit a team of size one for directives such as 'target parallel' that
6603 /// have no associated teams construct.
6604 ///
6605 /// Otherwise, return nullptr.
6606 static llvm::Value *
6607 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6608                                const OMPExecutableDirective &D) {
6609   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6610          "Clauses associated with the teams directive expected to be emitted "
6611          "only for the host!");
6612   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6613   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6614          "Expected target-based executable directive.");
6615   CGBuilderTy &Bld = CGF.Builder;
6616   switch (DirectiveKind) {
6617   case OMPD_target: {
6618     const auto *CS = D.getInnermostCapturedStmt();
6619     const auto *Body =
6620         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6621     const Stmt *ChildStmt =
6622         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6623     if (const auto *NestedDir =
6624             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6625       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6626         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6627           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6628           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6629           const Expr *NumTeams =
6630               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6631           llvm::Value *NumTeamsVal =
6632               CGF.EmitScalarExpr(NumTeams,
6633                                  /*IgnoreResultAssign*/ true);
6634           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6635                                    /*isSigned=*/true);
6636         }
6637         return Bld.getInt32(0);
6638       }
6639       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6640           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6641         return Bld.getInt32(1);
6642       return Bld.getInt32(0);
6643     }
6644     return nullptr;
6645   }
6646   case OMPD_target_teams:
6647   case OMPD_target_teams_distribute:
6648   case OMPD_target_teams_distribute_simd:
6649   case OMPD_target_teams_distribute_parallel_for:
6650   case OMPD_target_teams_distribute_parallel_for_simd: {
6651     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6652       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6653       const Expr *NumTeams =
6654           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6655       llvm::Value *NumTeamsVal =
6656           CGF.EmitScalarExpr(NumTeams,
6657                              /*IgnoreResultAssign*/ true);
6658       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6659                                /*isSigned=*/true);
6660     }
6661     return Bld.getInt32(0);
6662   }
6663   case OMPD_target_parallel:
6664   case OMPD_target_parallel_for:
6665   case OMPD_target_parallel_for_simd:
6666   case OMPD_target_simd:
6667     return Bld.getInt32(1);
6668   case OMPD_parallel:
6669   case OMPD_for:
6670   case OMPD_parallel_for:
6671   case OMPD_parallel_master:
6672   case OMPD_parallel_sections:
6673   case OMPD_for_simd:
6674   case OMPD_parallel_for_simd:
6675   case OMPD_cancel:
6676   case OMPD_cancellation_point:
6677   case OMPD_ordered:
6678   case OMPD_threadprivate:
6679   case OMPD_allocate:
6680   case OMPD_task:
6681   case OMPD_simd:
6682   case OMPD_tile:
6683   case OMPD_sections:
6684   case OMPD_section:
6685   case OMPD_single:
6686   case OMPD_master:
6687   case OMPD_critical:
6688   case OMPD_taskyield:
6689   case OMPD_barrier:
6690   case OMPD_taskwait:
6691   case OMPD_taskgroup:
6692   case OMPD_atomic:
6693   case OMPD_flush:
6694   case OMPD_depobj:
6695   case OMPD_scan:
6696   case OMPD_teams:
6697   case OMPD_target_data:
6698   case OMPD_target_exit_data:
6699   case OMPD_target_enter_data:
6700   case OMPD_distribute:
6701   case OMPD_distribute_simd:
6702   case OMPD_distribute_parallel_for:
6703   case OMPD_distribute_parallel_for_simd:
6704   case OMPD_teams_distribute:
6705   case OMPD_teams_distribute_simd:
6706   case OMPD_teams_distribute_parallel_for:
6707   case OMPD_teams_distribute_parallel_for_simd:
6708   case OMPD_target_update:
6709   case OMPD_declare_simd:
6710   case OMPD_declare_variant:
6711   case OMPD_begin_declare_variant:
6712   case OMPD_end_declare_variant:
6713   case OMPD_declare_target:
6714   case OMPD_end_declare_target:
6715   case OMPD_declare_reduction:
6716   case OMPD_declare_mapper:
6717   case OMPD_taskloop:
6718   case OMPD_taskloop_simd:
6719   case OMPD_master_taskloop:
6720   case OMPD_master_taskloop_simd:
6721   case OMPD_parallel_master_taskloop:
6722   case OMPD_parallel_master_taskloop_simd:
6723   case OMPD_requires:
6724   case OMPD_unknown:
6725     break;
6726   default:
6727     break;
6728   }
6729   llvm_unreachable("Unexpected directive kind.");
6730 }
6731 
6732 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6733                                   llvm::Value *DefaultThreadLimitVal) {
6734   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6735       CGF.getContext(), CS->getCapturedStmt());
6736   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6737     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6738       llvm::Value *NumThreads = nullptr;
6739       llvm::Value *CondVal = nullptr;
6740       // Handle if clause. If if clause present, the number of threads is
6741       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6742       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6743         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6744         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6745         const OMPIfClause *IfClause = nullptr;
6746         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6747           if (C->getNameModifier() == OMPD_unknown ||
6748               C->getNameModifier() == OMPD_parallel) {
6749             IfClause = C;
6750             break;
6751           }
6752         }
6753         if (IfClause) {
6754           const Expr *Cond = IfClause->getCondition();
6755           bool Result;
6756           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6757             if (!Result)
6758               return CGF.Builder.getInt32(1);
6759           } else {
6760             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6761             if (const auto *PreInit =
6762                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6763               for (const auto *I : PreInit->decls()) {
6764                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6765                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6766                 } else {
6767                   CodeGenFunction::AutoVarEmission Emission =
6768                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6769                   CGF.EmitAutoVarCleanups(Emission);
6770                 }
6771               }
6772             }
6773             CondVal = CGF.EvaluateExprAsBool(Cond);
6774           }
6775         }
6776       }
6777       // Check the value of num_threads clause iff if clause was not specified
6778       // or is not evaluated to false.
6779       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6780         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6781         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6782         const auto *NumThreadsClause =
6783             Dir->getSingleClause<OMPNumThreadsClause>();
6784         CodeGenFunction::LexicalScope Scope(
6785             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6786         if (const auto *PreInit =
6787                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6788           for (const auto *I : PreInit->decls()) {
6789             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6790               CGF.EmitVarDecl(cast<VarDecl>(*I));
6791             } else {
6792               CodeGenFunction::AutoVarEmission Emission =
6793                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6794               CGF.EmitAutoVarCleanups(Emission);
6795             }
6796           }
6797         }
6798         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6799         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6800                                                /*isSigned=*/false);
6801         if (DefaultThreadLimitVal)
6802           NumThreads = CGF.Builder.CreateSelect(
6803               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6804               DefaultThreadLimitVal, NumThreads);
6805       } else {
6806         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6807                                            : CGF.Builder.getInt32(0);
6808       }
6809       // Process condition of the if clause.
6810       if (CondVal) {
6811         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6812                                               CGF.Builder.getInt32(1));
6813       }
6814       return NumThreads;
6815     }
6816     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6817       return CGF.Builder.getInt32(1);
6818     return DefaultThreadLimitVal;
6819   }
6820   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6821                                : CGF.Builder.getInt32(0);
6822 }
6823 
6824 /// Emit the number of threads for a target directive.  Inspect the
6825 /// thread_limit clause associated with a teams construct combined or closely
6826 /// nested with the target directive.
6827 ///
6828 /// Emit the num_threads clause for directives such as 'target parallel' that
6829 /// have no associated teams construct.
6830 ///
6831 /// Otherwise, return nullptr.
6832 static llvm::Value *
6833 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6834                                  const OMPExecutableDirective &D) {
6835   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6836          "Clauses associated with the teams directive expected to be emitted "
6837          "only for the host!");
6838   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6839   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6840          "Expected target-based executable directive.");
6841   CGBuilderTy &Bld = CGF.Builder;
6842   llvm::Value *ThreadLimitVal = nullptr;
6843   llvm::Value *NumThreadsVal = nullptr;
6844   switch (DirectiveKind) {
6845   case OMPD_target: {
6846     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6847     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6848       return NumThreads;
6849     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6850         CGF.getContext(), CS->getCapturedStmt());
6851     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6852       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6853         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6854         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6855         const auto *ThreadLimitClause =
6856             Dir->getSingleClause<OMPThreadLimitClause>();
6857         CodeGenFunction::LexicalScope Scope(
6858             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6859         if (const auto *PreInit =
6860                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6861           for (const auto *I : PreInit->decls()) {
6862             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6863               CGF.EmitVarDecl(cast<VarDecl>(*I));
6864             } else {
6865               CodeGenFunction::AutoVarEmission Emission =
6866                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6867               CGF.EmitAutoVarCleanups(Emission);
6868             }
6869           }
6870         }
6871         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6872             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6873         ThreadLimitVal =
6874             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6875       }
6876       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6877           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6878         CS = Dir->getInnermostCapturedStmt();
6879         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6880             CGF.getContext(), CS->getCapturedStmt());
6881         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6882       }
6883       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6884           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6885         CS = Dir->getInnermostCapturedStmt();
6886         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6887           return NumThreads;
6888       }
6889       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6890         return Bld.getInt32(1);
6891     }
6892     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6893   }
6894   case OMPD_target_teams: {
6895     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6896       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6897       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6898       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6899           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6900       ThreadLimitVal =
6901           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6902     }
6903     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6904     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6905       return NumThreads;
6906     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6907         CGF.getContext(), CS->getCapturedStmt());
6908     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6909       if (Dir->getDirectiveKind() == OMPD_distribute) {
6910         CS = Dir->getInnermostCapturedStmt();
6911         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6912           return NumThreads;
6913       }
6914     }
6915     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6916   }
6917   case OMPD_target_teams_distribute:
6918     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6919       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6920       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6921       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6922           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6923       ThreadLimitVal =
6924           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6925     }
6926     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6927   case OMPD_target_parallel:
6928   case OMPD_target_parallel_for:
6929   case OMPD_target_parallel_for_simd:
6930   case OMPD_target_teams_distribute_parallel_for:
6931   case OMPD_target_teams_distribute_parallel_for_simd: {
6932     llvm::Value *CondVal = nullptr;
6933     // Handle if clause. If if clause present, the number of threads is
6934     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6935     if (D.hasClausesOfKind<OMPIfClause>()) {
6936       const OMPIfClause *IfClause = nullptr;
6937       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6938         if (C->getNameModifier() == OMPD_unknown ||
6939             C->getNameModifier() == OMPD_parallel) {
6940           IfClause = C;
6941           break;
6942         }
6943       }
6944       if (IfClause) {
6945         const Expr *Cond = IfClause->getCondition();
6946         bool Result;
6947         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6948           if (!Result)
6949             return Bld.getInt32(1);
6950         } else {
6951           CodeGenFunction::RunCleanupsScope Scope(CGF);
6952           CondVal = CGF.EvaluateExprAsBool(Cond);
6953         }
6954       }
6955     }
6956     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6957       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6958       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6959       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6960           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6961       ThreadLimitVal =
6962           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6963     }
6964     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6965       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6966       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6967       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6968           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6969       NumThreadsVal =
6970           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6971       ThreadLimitVal = ThreadLimitVal
6972                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6973                                                                 ThreadLimitVal),
6974                                               NumThreadsVal, ThreadLimitVal)
6975                            : NumThreadsVal;
6976     }
6977     if (!ThreadLimitVal)
6978       ThreadLimitVal = Bld.getInt32(0);
6979     if (CondVal)
6980       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6981     return ThreadLimitVal;
6982   }
6983   case OMPD_target_teams_distribute_simd:
6984   case OMPD_target_simd:
6985     return Bld.getInt32(1);
6986   case OMPD_parallel:
6987   case OMPD_for:
6988   case OMPD_parallel_for:
6989   case OMPD_parallel_master:
6990   case OMPD_parallel_sections:
6991   case OMPD_for_simd:
6992   case OMPD_parallel_for_simd:
6993   case OMPD_cancel:
6994   case OMPD_cancellation_point:
6995   case OMPD_ordered:
6996   case OMPD_threadprivate:
6997   case OMPD_allocate:
6998   case OMPD_task:
6999   case OMPD_simd:
7000   case OMPD_tile:
7001   case OMPD_sections:
7002   case OMPD_section:
7003   case OMPD_single:
7004   case OMPD_master:
7005   case OMPD_critical:
7006   case OMPD_taskyield:
7007   case OMPD_barrier:
7008   case OMPD_taskwait:
7009   case OMPD_taskgroup:
7010   case OMPD_atomic:
7011   case OMPD_flush:
7012   case OMPD_depobj:
7013   case OMPD_scan:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_variant:
7029   case OMPD_begin_declare_variant:
7030   case OMPD_end_declare_variant:
7031   case OMPD_declare_target:
7032   case OMPD_end_declare_target:
7033   case OMPD_declare_reduction:
7034   case OMPD_declare_mapper:
7035   case OMPD_taskloop:
7036   case OMPD_taskloop_simd:
7037   case OMPD_master_taskloop:
7038   case OMPD_master_taskloop_simd:
7039   case OMPD_parallel_master_taskloop:
7040   case OMPD_parallel_master_taskloop_simd:
7041   case OMPD_requires:
7042   case OMPD_unknown:
7043     break;
7044   default:
7045     break;
7046   }
7047   llvm_unreachable("Unsupported directive kind.");
7048 }
7049 
7050 namespace {
7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7052 
7053 // Utility to handle information from clauses associated with a given
7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7055 // It provides a convenient interface to obtain the information and generate
7056 // code for that information.
7057 class MappableExprsHandler {
7058 public:
7059   /// Values for bit flags used to specify the mapping type for
7060   /// offloading.
7061   enum OpenMPOffloadMappingFlags : uint64_t {
7062     /// No flags
7063     OMP_MAP_NONE = 0x0,
7064     /// Allocate memory on the device and move data from host to device.
7065     OMP_MAP_TO = 0x01,
7066     /// Allocate memory on the device and move data from device to host.
7067     OMP_MAP_FROM = 0x02,
7068     /// Always perform the requested mapping action on the element, even
7069     /// if it was already mapped before.
7070     OMP_MAP_ALWAYS = 0x04,
7071     /// Delete the element from the device environment, ignoring the
7072     /// current reference count associated with the element.
7073     OMP_MAP_DELETE = 0x08,
7074     /// The element being mapped is a pointer-pointee pair; both the
7075     /// pointer and the pointee should be mapped.
7076     OMP_MAP_PTR_AND_OBJ = 0x10,
7077     /// This flags signals that the base address of an entry should be
7078     /// passed to the target kernel as an argument.
7079     OMP_MAP_TARGET_PARAM = 0x20,
7080     /// Signal that the runtime library has to return the device pointer
7081     /// in the current position for the data being mapped. Used when we have the
7082     /// use_device_ptr or use_device_addr clause.
7083     OMP_MAP_RETURN_PARAM = 0x40,
7084     /// This flag signals that the reference being passed is a pointer to
7085     /// private data.
7086     OMP_MAP_PRIVATE = 0x80,
7087     /// Pass the element to the device by value.
7088     OMP_MAP_LITERAL = 0x100,
7089     /// Implicit map
7090     OMP_MAP_IMPLICIT = 0x200,
7091     /// Close is a hint to the runtime to allocate memory close to
7092     /// the target device.
7093     OMP_MAP_CLOSE = 0x400,
7094     /// 0x800 is reserved for compatibility with XLC.
7095     /// Produce a runtime error if the data is not already allocated.
7096     OMP_MAP_PRESENT = 0x1000,
7097     /// Signal that the runtime library should use args as an array of
7098     /// descriptor_dim pointers and use args_size as dims. Used when we have
7099     /// non-contiguous list items in target update directive
7100     OMP_MAP_NON_CONTIG = 0x100000000000,
7101     /// The 16 MSBs of the flags indicate whether the entry is member of some
7102     /// struct/class.
7103     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7104     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7105   };
7106 
7107   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7108   static unsigned getFlagMemberOffset() {
7109     unsigned Offset = 0;
7110     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7111          Remain = Remain >> 1)
7112       Offset++;
7113     return Offset;
7114   }
7115 
7116   /// Class that holds debugging information for a data mapping to be passed to
7117   /// the runtime library.
7118   class MappingExprInfo {
7119     /// The variable declaration used for the data mapping.
7120     const ValueDecl *MapDecl = nullptr;
7121     /// The original expression used in the map clause, or null if there is
7122     /// none.
7123     const Expr *MapExpr = nullptr;
7124 
7125   public:
7126     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7127         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7128 
7129     const ValueDecl *getMapDecl() const { return MapDecl; }
7130     const Expr *getMapExpr() const { return MapExpr; }
7131   };
7132 
7133   /// Class that associates information with a base pointer to be passed to the
7134   /// runtime library.
7135   class BasePointerInfo {
7136     /// The base pointer.
7137     llvm::Value *Ptr = nullptr;
7138     /// The base declaration that refers to this device pointer, or null if
7139     /// there is none.
7140     const ValueDecl *DevPtrDecl = nullptr;
7141 
7142   public:
7143     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7144         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7145     llvm::Value *operator*() const { return Ptr; }
7146     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7147     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7148   };
7149 
7150   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7151   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7152   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7153   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7154   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7155   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7156   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7157 
7158   /// This structure contains combined information generated for mappable
7159   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7160   /// mappers, and non-contiguous information.
7161   struct MapCombinedInfoTy {
7162     struct StructNonContiguousInfo {
7163       bool IsNonContiguous = false;
7164       MapDimArrayTy Dims;
7165       MapNonContiguousArrayTy Offsets;
7166       MapNonContiguousArrayTy Counts;
7167       MapNonContiguousArrayTy Strides;
7168     };
7169     MapExprsArrayTy Exprs;
7170     MapBaseValuesArrayTy BasePointers;
7171     MapValuesArrayTy Pointers;
7172     MapValuesArrayTy Sizes;
7173     MapFlagsArrayTy Types;
7174     MapMappersArrayTy Mappers;
7175     StructNonContiguousInfo NonContigInfo;
7176 
7177     /// Append arrays in \a CurInfo.
7178     void append(MapCombinedInfoTy &CurInfo) {
7179       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7180       BasePointers.append(CurInfo.BasePointers.begin(),
7181                           CurInfo.BasePointers.end());
7182       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7183       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7184       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7185       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7186       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7187                                  CurInfo.NonContigInfo.Dims.end());
7188       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7189                                     CurInfo.NonContigInfo.Offsets.end());
7190       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7191                                    CurInfo.NonContigInfo.Counts.end());
7192       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7193                                     CurInfo.NonContigInfo.Strides.end());
7194     }
7195   };
7196 
7197   /// Map between a struct and the its lowest & highest elements which have been
7198   /// mapped.
7199   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7200   ///                    HE(FieldIndex, Pointer)}
7201   struct StructRangeInfoTy {
7202     MapCombinedInfoTy PreliminaryMapData;
7203     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7204         0, Address::invalid()};
7205     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7206         0, Address::invalid()};
7207     Address Base = Address::invalid();
7208     Address LB = Address::invalid();
7209     bool IsArraySection = false;
7210     bool HasCompleteRecord = false;
7211   };
7212 
7213 private:
7214   /// Kind that defines how a device pointer has to be returned.
7215   struct MapInfo {
7216     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7217     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7218     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7219     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7220     bool ReturnDevicePointer = false;
7221     bool IsImplicit = false;
7222     const ValueDecl *Mapper = nullptr;
7223     const Expr *VarRef = nullptr;
7224     bool ForDeviceAddr = false;
7225 
7226     MapInfo() = default;
7227     MapInfo(
7228         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7229         OpenMPMapClauseKind MapType,
7230         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7231         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7232         bool ReturnDevicePointer, bool IsImplicit,
7233         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7234         bool ForDeviceAddr = false)
7235         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7236           MotionModifiers(MotionModifiers),
7237           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7238           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7239   };
7240 
7241   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7242   /// member and there is no map information about it, then emission of that
7243   /// entry is deferred until the whole struct has been processed.
7244   struct DeferredDevicePtrEntryTy {
7245     const Expr *IE = nullptr;
7246     const ValueDecl *VD = nullptr;
7247     bool ForDeviceAddr = false;
7248 
7249     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7250                              bool ForDeviceAddr)
7251         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7252   };
7253 
7254   /// The target directive from where the mappable clauses were extracted. It
7255   /// is either a executable directive or a user-defined mapper directive.
7256   llvm::PointerUnion<const OMPExecutableDirective *,
7257                      const OMPDeclareMapperDecl *>
7258       CurDir;
7259 
7260   /// Function the directive is being generated for.
7261   CodeGenFunction &CGF;
7262 
7263   /// Set of all first private variables in the current directive.
7264   /// bool data is set to true if the variable is implicitly marked as
7265   /// firstprivate, false otherwise.
7266   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7267 
7268   /// Map between device pointer declarations and their expression components.
7269   /// The key value for declarations in 'this' is null.
7270   llvm::DenseMap<
7271       const ValueDecl *,
7272       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7273       DevPointersMap;
7274 
7275   llvm::Value *getExprTypeSize(const Expr *E) const {
7276     QualType ExprTy = E->getType().getCanonicalType();
7277 
7278     // Calculate the size for array shaping expression.
7279     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7280       llvm::Value *Size =
7281           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7282       for (const Expr *SE : OAE->getDimensions()) {
7283         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7284         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7285                                       CGF.getContext().getSizeType(),
7286                                       SE->getExprLoc());
7287         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7288       }
7289       return Size;
7290     }
7291 
7292     // Reference types are ignored for mapping purposes.
7293     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7294       ExprTy = RefTy->getPointeeType().getCanonicalType();
7295 
7296     // Given that an array section is considered a built-in type, we need to
7297     // do the calculation based on the length of the section instead of relying
7298     // on CGF.getTypeSize(E->getType()).
7299     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7300       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7301                             OAE->getBase()->IgnoreParenImpCasts())
7302                             .getCanonicalType();
7303 
7304       // If there is no length associated with the expression and lower bound is
7305       // not specified too, that means we are using the whole length of the
7306       // base.
7307       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7308           !OAE->getLowerBound())
7309         return CGF.getTypeSize(BaseTy);
7310 
7311       llvm::Value *ElemSize;
7312       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7313         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7314       } else {
7315         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7316         assert(ATy && "Expecting array type if not a pointer type.");
7317         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7318       }
7319 
7320       // If we don't have a length at this point, that is because we have an
7321       // array section with a single element.
7322       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7323         return ElemSize;
7324 
7325       if (const Expr *LenExpr = OAE->getLength()) {
7326         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7327         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7328                                              CGF.getContext().getSizeType(),
7329                                              LenExpr->getExprLoc());
7330         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7331       }
7332       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7333              OAE->getLowerBound() && "expected array_section[lb:].");
7334       // Size = sizetype - lb * elemtype;
7335       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7336       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7337       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7338                                        CGF.getContext().getSizeType(),
7339                                        OAE->getLowerBound()->getExprLoc());
7340       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7341       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7342       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7343       LengthVal = CGF.Builder.CreateSelect(
7344           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7345       return LengthVal;
7346     }
7347     return CGF.getTypeSize(ExprTy);
7348   }
7349 
7350   /// Return the corresponding bits for a given map clause modifier. Add
7351   /// a flag marking the map as a pointer if requested. Add a flag marking the
7352   /// map as the first one of a series of maps that relate to the same map
7353   /// expression.
7354   OpenMPOffloadMappingFlags getMapTypeBits(
7355       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7356       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7357       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7358     OpenMPOffloadMappingFlags Bits =
7359         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7360     switch (MapType) {
7361     case OMPC_MAP_alloc:
7362     case OMPC_MAP_release:
7363       // alloc and release is the default behavior in the runtime library,  i.e.
7364       // if we don't pass any bits alloc/release that is what the runtime is
7365       // going to do. Therefore, we don't need to signal anything for these two
7366       // type modifiers.
7367       break;
7368     case OMPC_MAP_to:
7369       Bits |= OMP_MAP_TO;
7370       break;
7371     case OMPC_MAP_from:
7372       Bits |= OMP_MAP_FROM;
7373       break;
7374     case OMPC_MAP_tofrom:
7375       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7376       break;
7377     case OMPC_MAP_delete:
7378       Bits |= OMP_MAP_DELETE;
7379       break;
7380     case OMPC_MAP_unknown:
7381       llvm_unreachable("Unexpected map type!");
7382     }
7383     if (AddPtrFlag)
7384       Bits |= OMP_MAP_PTR_AND_OBJ;
7385     if (AddIsTargetParamFlag)
7386       Bits |= OMP_MAP_TARGET_PARAM;
7387     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7388         != MapModifiers.end())
7389       Bits |= OMP_MAP_ALWAYS;
7390     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7391         != MapModifiers.end())
7392       Bits |= OMP_MAP_CLOSE;
7393     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7394             MapModifiers.end() ||
7395         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7396             MotionModifiers.end())
7397       Bits |= OMP_MAP_PRESENT;
7398     if (IsNonContiguous)
7399       Bits |= OMP_MAP_NON_CONTIG;
7400     return Bits;
7401   }
7402 
7403   /// Return true if the provided expression is a final array section. A
7404   /// final array section, is one whose length can't be proved to be one.
7405   bool isFinalArraySectionExpression(const Expr *E) const {
7406     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7407 
7408     // It is not an array section and therefore not a unity-size one.
7409     if (!OASE)
7410       return false;
7411 
7412     // An array section with no colon always refer to a single element.
7413     if (OASE->getColonLocFirst().isInvalid())
7414       return false;
7415 
7416     const Expr *Length = OASE->getLength();
7417 
7418     // If we don't have a length we have to check if the array has size 1
7419     // for this dimension. Also, we should always expect a length if the
7420     // base type is pointer.
7421     if (!Length) {
7422       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7423                              OASE->getBase()->IgnoreParenImpCasts())
7424                              .getCanonicalType();
7425       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7426         return ATy->getSize().getSExtValue() != 1;
7427       // If we don't have a constant dimension length, we have to consider
7428       // the current section as having any size, so it is not necessarily
7429       // unitary. If it happen to be unity size, that's user fault.
7430       return true;
7431     }
7432 
7433     // Check if the length evaluates to 1.
7434     Expr::EvalResult Result;
7435     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7436       return true; // Can have more that size 1.
7437 
7438     llvm::APSInt ConstLength = Result.Val.getInt();
7439     return ConstLength.getSExtValue() != 1;
7440   }
7441 
7442   /// Generate the base pointers, section pointers, sizes, map type bits, and
7443   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7444   /// map type, map or motion modifiers, and expression components.
7445   /// \a IsFirstComponent should be set to true if the provided set of
7446   /// components is the first associated with a capture.
7447   void generateInfoForComponentList(
7448       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7449       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7450       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7451       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7452       bool IsFirstComponentList, bool IsImplicit,
7453       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7454       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7455       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7456           OverlappedElements = llvm::None) const {
7457     // The following summarizes what has to be generated for each map and the
7458     // types below. The generated information is expressed in this order:
7459     // base pointer, section pointer, size, flags
7460     // (to add to the ones that come from the map type and modifier).
7461     //
7462     // double d;
7463     // int i[100];
7464     // float *p;
7465     //
7466     // struct S1 {
7467     //   int i;
7468     //   float f[50];
7469     // }
7470     // struct S2 {
7471     //   int i;
7472     //   float f[50];
7473     //   S1 s;
7474     //   double *p;
7475     //   struct S2 *ps;
7476     //   int &ref;
7477     // }
7478     // S2 s;
7479     // S2 *ps;
7480     //
7481     // map(d)
7482     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7483     //
7484     // map(i)
7485     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7486     //
7487     // map(i[1:23])
7488     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7489     //
7490     // map(p)
7491     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7492     //
7493     // map(p[1:24])
7494     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7495     // in unified shared memory mode or for local pointers
7496     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7497     //
7498     // map(s)
7499     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7500     //
7501     // map(s.i)
7502     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7503     //
7504     // map(s.s.f)
7505     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7506     //
7507     // map(s.p)
7508     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7509     //
7510     // map(to: s.p[:22])
7511     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7512     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7513     // &(s.p), &(s.p[0]), 22*sizeof(double),
7514     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7515     // (*) alloc space for struct members, only this is a target parameter
7516     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7517     //      optimizes this entry out, same in the examples below)
7518     // (***) map the pointee (map: to)
7519     //
7520     // map(to: s.ref)
7521     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7522     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7523     // (*) alloc space for struct members, only this is a target parameter
7524     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7525     //      optimizes this entry out, same in the examples below)
7526     // (***) map the pointee (map: to)
7527     //
7528     // map(s.ps)
7529     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7530     //
7531     // map(from: s.ps->s.i)
7532     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7533     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7534     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7535     //
7536     // map(to: s.ps->ps)
7537     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7538     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7539     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7540     //
7541     // map(s.ps->ps->ps)
7542     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7543     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7544     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7545     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7546     //
7547     // map(to: s.ps->ps->s.f[:22])
7548     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7549     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7550     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7551     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7552     //
7553     // map(ps)
7554     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7555     //
7556     // map(ps->i)
7557     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7558     //
7559     // map(ps->s.f)
7560     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7561     //
7562     // map(from: ps->p)
7563     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7564     //
7565     // map(to: ps->p[:22])
7566     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7567     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7568     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7569     //
7570     // map(ps->ps)
7571     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7572     //
7573     // map(from: ps->ps->s.i)
7574     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7575     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7576     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7577     //
7578     // map(from: ps->ps->ps)
7579     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7580     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7581     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7582     //
7583     // map(ps->ps->ps->ps)
7584     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7585     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7586     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7587     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7588     //
7589     // map(to: ps->ps->ps->s.f[:22])
7590     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7591     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7592     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7593     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7594     //
7595     // map(to: s.f[:22]) map(from: s.p[:33])
7596     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7597     //     sizeof(double*) (**), TARGET_PARAM
7598     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7599     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7600     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7601     // (*) allocate contiguous space needed to fit all mapped members even if
7602     //     we allocate space for members not mapped (in this example,
7603     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7604     //     them as well because they fall between &s.f[0] and &s.p)
7605     //
7606     // map(from: s.f[:22]) map(to: ps->p[:33])
7607     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7608     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7609     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7610     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7611     // (*) the struct this entry pertains to is the 2nd element in the list of
7612     //     arguments, hence MEMBER_OF(2)
7613     //
7614     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7615     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7616     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7617     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7618     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7619     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7620     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7621     // (*) the struct this entry pertains to is the 4th element in the list
7622     //     of arguments, hence MEMBER_OF(4)
7623 
7624     // Track if the map information being generated is the first for a capture.
7625     bool IsCaptureFirstInfo = IsFirstComponentList;
7626     // When the variable is on a declare target link or in a to clause with
7627     // unified memory, a reference is needed to hold the host/device address
7628     // of the variable.
7629     bool RequiresReference = false;
7630 
7631     // Scan the components from the base to the complete expression.
7632     auto CI = Components.rbegin();
7633     auto CE = Components.rend();
7634     auto I = CI;
7635 
7636     // Track if the map information being generated is the first for a list of
7637     // components.
7638     bool IsExpressionFirstInfo = true;
7639     bool FirstPointerInComplexData = false;
7640     Address BP = Address::invalid();
7641     const Expr *AssocExpr = I->getAssociatedExpression();
7642     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7643     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7644     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7645 
7646     if (isa<MemberExpr>(AssocExpr)) {
7647       // The base is the 'this' pointer. The content of the pointer is going
7648       // to be the base of the field being mapped.
7649       BP = CGF.LoadCXXThisAddress();
7650     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7651                (OASE &&
7652                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7653       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7654     } else if (OAShE &&
7655                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7656       BP = Address(
7657           CGF.EmitScalarExpr(OAShE->getBase()),
7658           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7659     } else {
7660       // The base is the reference to the variable.
7661       // BP = &Var.
7662       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7663       if (const auto *VD =
7664               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7665         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7666                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7667           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7668               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7669                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7670             RequiresReference = true;
7671             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7672           }
7673         }
7674       }
7675 
7676       // If the variable is a pointer and is being dereferenced (i.e. is not
7677       // the last component), the base has to be the pointer itself, not its
7678       // reference. References are ignored for mapping purposes.
7679       QualType Ty =
7680           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7681       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7682         // No need to generate individual map information for the pointer, it
7683         // can be associated with the combined storage if shared memory mode is
7684         // active or the base declaration is not global variable.
7685         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7686         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7687             !VD || VD->hasLocalStorage())
7688           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7689         else
7690           FirstPointerInComplexData = true;
7691         ++I;
7692       }
7693     }
7694 
7695     // Track whether a component of the list should be marked as MEMBER_OF some
7696     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7697     // in a component list should be marked as MEMBER_OF, all subsequent entries
7698     // do not belong to the base struct. E.g.
7699     // struct S2 s;
7700     // s.ps->ps->ps->f[:]
7701     //   (1) (2) (3) (4)
7702     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7703     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7704     // is the pointee of ps(2) which is not member of struct s, so it should not
7705     // be marked as such (it is still PTR_AND_OBJ).
7706     // The variable is initialized to false so that PTR_AND_OBJ entries which
7707     // are not struct members are not considered (e.g. array of pointers to
7708     // data).
7709     bool ShouldBeMemberOf = false;
7710 
7711     // Variable keeping track of whether or not we have encountered a component
7712     // in the component list which is a member expression. Useful when we have a
7713     // pointer or a final array section, in which case it is the previous
7714     // component in the list which tells us whether we have a member expression.
7715     // E.g. X.f[:]
7716     // While processing the final array section "[:]" it is "f" which tells us
7717     // whether we are dealing with a member of a declared struct.
7718     const MemberExpr *EncounteredME = nullptr;
7719 
7720     // Track for the total number of dimension. Start from one for the dummy
7721     // dimension.
7722     uint64_t DimSize = 1;
7723 
7724     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7725     bool IsPrevMemberReference = false;
7726 
7727     for (; I != CE; ++I) {
7728       // If the current component is member of a struct (parent struct) mark it.
7729       if (!EncounteredME) {
7730         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7731         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7732         // as MEMBER_OF the parent struct.
7733         if (EncounteredME) {
7734           ShouldBeMemberOf = true;
7735           // Do not emit as complex pointer if this is actually not array-like
7736           // expression.
7737           if (FirstPointerInComplexData) {
7738             QualType Ty = std::prev(I)
7739                               ->getAssociatedDeclaration()
7740                               ->getType()
7741                               .getNonReferenceType();
7742             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7743             FirstPointerInComplexData = false;
7744           }
7745         }
7746       }
7747 
7748       auto Next = std::next(I);
7749 
7750       // We need to generate the addresses and sizes if this is the last
7751       // component, if the component is a pointer or if it is an array section
7752       // whose length can't be proved to be one. If this is a pointer, it
7753       // becomes the base address for the following components.
7754 
7755       // A final array section, is one whose length can't be proved to be one.
7756       // If the map item is non-contiguous then we don't treat any array section
7757       // as final array section.
7758       bool IsFinalArraySection =
7759           !IsNonContiguous &&
7760           isFinalArraySectionExpression(I->getAssociatedExpression());
7761 
7762       // If we have a declaration for the mapping use that, otherwise use
7763       // the base declaration of the map clause.
7764       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7765                                      ? I->getAssociatedDeclaration()
7766                                      : BaseDecl;
7767 
7768       // Get information on whether the element is a pointer. Have to do a
7769       // special treatment for array sections given that they are built-in
7770       // types.
7771       const auto *OASE =
7772           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7773       const auto *OAShE =
7774           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7775       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7776       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7777       bool IsPointer =
7778           OAShE ||
7779           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7780                        .getCanonicalType()
7781                        ->isAnyPointerType()) ||
7782           I->getAssociatedExpression()->getType()->isAnyPointerType();
7783       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7784                                MapDecl &&
7785                                MapDecl->getType()->isLValueReferenceType();
7786       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7787 
7788       if (OASE)
7789         ++DimSize;
7790 
7791       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7792           IsFinalArraySection) {
7793         // If this is not the last component, we expect the pointer to be
7794         // associated with an array expression or member expression.
7795         assert((Next == CE ||
7796                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7797                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7798                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7799                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7800                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7801                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7802                "Unexpected expression");
7803 
7804         Address LB = Address::invalid();
7805         Address LowestElem = Address::invalid();
7806         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7807                                        const MemberExpr *E) {
7808           const Expr *BaseExpr = E->getBase();
7809           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7810           // scalar.
7811           LValue BaseLV;
7812           if (E->isArrow()) {
7813             LValueBaseInfo BaseInfo;
7814             TBAAAccessInfo TBAAInfo;
7815             Address Addr =
7816                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7817             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7818             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7819           } else {
7820             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7821           }
7822           return BaseLV;
7823         };
7824         if (OAShE) {
7825           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7826                                     CGF.getContext().getTypeAlignInChars(
7827                                         OAShE->getBase()->getType()));
7828         } else if (IsMemberReference) {
7829           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7830           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7831           LowestElem = CGF.EmitLValueForFieldInitialization(
7832                               BaseLVal, cast<FieldDecl>(MapDecl))
7833                            .getAddress(CGF);
7834           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7835                    .getAddress(CGF);
7836         } else {
7837           LowestElem = LB =
7838               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7839                   .getAddress(CGF);
7840         }
7841 
7842         // If this component is a pointer inside the base struct then we don't
7843         // need to create any entry for it - it will be combined with the object
7844         // it is pointing to into a single PTR_AND_OBJ entry.
7845         bool IsMemberPointerOrAddr =
7846             EncounteredME &&
7847             (((IsPointer || ForDeviceAddr) &&
7848               I->getAssociatedExpression() == EncounteredME) ||
7849              (IsPrevMemberReference && !IsPointer) ||
7850              (IsMemberReference && Next != CE &&
7851               !Next->getAssociatedExpression()->getType()->isPointerType()));
7852         if (!OverlappedElements.empty() && Next == CE) {
7853           // Handle base element with the info for overlapped elements.
7854           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7855           assert(!IsPointer &&
7856                  "Unexpected base element with the pointer type.");
7857           // Mark the whole struct as the struct that requires allocation on the
7858           // device.
7859           PartialStruct.LowestElem = {0, LowestElem};
7860           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7861               I->getAssociatedExpression()->getType());
7862           Address HB = CGF.Builder.CreateConstGEP(
7863               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7864                                                               CGF.VoidPtrTy),
7865               TypeSize.getQuantity() - 1);
7866           PartialStruct.HighestElem = {
7867               std::numeric_limits<decltype(
7868                   PartialStruct.HighestElem.first)>::max(),
7869               HB};
7870           PartialStruct.Base = BP;
7871           PartialStruct.LB = LB;
7872           assert(
7873               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7874               "Overlapped elements must be used only once for the variable.");
7875           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7876           // Emit data for non-overlapped data.
7877           OpenMPOffloadMappingFlags Flags =
7878               OMP_MAP_MEMBER_OF |
7879               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7880                              /*AddPtrFlag=*/false,
7881                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7882           llvm::Value *Size = nullptr;
7883           // Do bitcopy of all non-overlapped structure elements.
7884           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7885                    Component : OverlappedElements) {
7886             Address ComponentLB = Address::invalid();
7887             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7888                  Component) {
7889               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7890                 const auto *FD = dyn_cast<FieldDecl>(VD);
7891                 if (FD && FD->getType()->isLValueReferenceType()) {
7892                   const auto *ME =
7893                       cast<MemberExpr>(MC.getAssociatedExpression());
7894                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7895                   ComponentLB =
7896                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7897                           .getAddress(CGF);
7898                 } else {
7899                   ComponentLB =
7900                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7901                           .getAddress(CGF);
7902                 }
7903                 Size = CGF.Builder.CreatePtrDiff(
7904                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7905                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7906                 break;
7907               }
7908             }
7909             assert(Size && "Failed to determine structure size");
7910             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7911             CombinedInfo.BasePointers.push_back(BP.getPointer());
7912             CombinedInfo.Pointers.push_back(LB.getPointer());
7913             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7914                 Size, CGF.Int64Ty, /*isSigned=*/true));
7915             CombinedInfo.Types.push_back(Flags);
7916             CombinedInfo.Mappers.push_back(nullptr);
7917             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7918                                                                       : 1);
7919             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7920           }
7921           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7922           CombinedInfo.BasePointers.push_back(BP.getPointer());
7923           CombinedInfo.Pointers.push_back(LB.getPointer());
7924           Size = CGF.Builder.CreatePtrDiff(
7925               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7926               CGF.EmitCastToVoidPtr(LB.getPointer()));
7927           CombinedInfo.Sizes.push_back(
7928               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7929           CombinedInfo.Types.push_back(Flags);
7930           CombinedInfo.Mappers.push_back(nullptr);
7931           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7932                                                                     : 1);
7933           break;
7934         }
7935         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7936         if (!IsMemberPointerOrAddr ||
7937             (Next == CE && MapType != OMPC_MAP_unknown)) {
7938           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7939           CombinedInfo.BasePointers.push_back(BP.getPointer());
7940           CombinedInfo.Pointers.push_back(LB.getPointer());
7941           CombinedInfo.Sizes.push_back(
7942               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7943           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7944                                                                     : 1);
7945 
7946           // If Mapper is valid, the last component inherits the mapper.
7947           bool HasMapper = Mapper && Next == CE;
7948           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7949 
7950           // We need to add a pointer flag for each map that comes from the
7951           // same expression except for the first one. We also need to signal
7952           // this map is the first one that relates with the current capture
7953           // (there is a set of entries for each capture).
7954           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7955               MapType, MapModifiers, MotionModifiers, IsImplicit,
7956               !IsExpressionFirstInfo || RequiresReference ||
7957                   FirstPointerInComplexData || IsMemberReference,
7958               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7959 
7960           if (!IsExpressionFirstInfo || IsMemberReference) {
7961             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7962             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7963             if (IsPointer || (IsMemberReference && Next != CE))
7964               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7965                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7966 
7967             if (ShouldBeMemberOf) {
7968               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7969               // should be later updated with the correct value of MEMBER_OF.
7970               Flags |= OMP_MAP_MEMBER_OF;
7971               // From now on, all subsequent PTR_AND_OBJ entries should not be
7972               // marked as MEMBER_OF.
7973               ShouldBeMemberOf = false;
7974             }
7975           }
7976 
7977           CombinedInfo.Types.push_back(Flags);
7978         }
7979 
7980         // If we have encountered a member expression so far, keep track of the
7981         // mapped member. If the parent is "*this", then the value declaration
7982         // is nullptr.
7983         if (EncounteredME) {
7984           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7985           unsigned FieldIndex = FD->getFieldIndex();
7986 
7987           // Update info about the lowest and highest elements for this struct
7988           if (!PartialStruct.Base.isValid()) {
7989             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7990             if (IsFinalArraySection) {
7991               Address HB =
7992                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7993                       .getAddress(CGF);
7994               PartialStruct.HighestElem = {FieldIndex, HB};
7995             } else {
7996               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7997             }
7998             PartialStruct.Base = BP;
7999             PartialStruct.LB = BP;
8000           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8001             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8002           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8003             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8004           }
8005         }
8006 
8007         // Need to emit combined struct for array sections.
8008         if (IsFinalArraySection || IsNonContiguous)
8009           PartialStruct.IsArraySection = true;
8010 
8011         // If we have a final array section, we are done with this expression.
8012         if (IsFinalArraySection)
8013           break;
8014 
8015         // The pointer becomes the base for the next element.
8016         if (Next != CE)
8017           BP = IsMemberReference ? LowestElem : LB;
8018 
8019         IsExpressionFirstInfo = false;
8020         IsCaptureFirstInfo = false;
8021         FirstPointerInComplexData = false;
8022         IsPrevMemberReference = IsMemberReference;
8023       } else if (FirstPointerInComplexData) {
8024         QualType Ty = Components.rbegin()
8025                           ->getAssociatedDeclaration()
8026                           ->getType()
8027                           .getNonReferenceType();
8028         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8029         FirstPointerInComplexData = false;
8030       }
8031     }
8032     // If ran into the whole component - allocate the space for the whole
8033     // record.
8034     if (!EncounteredME)
8035       PartialStruct.HasCompleteRecord = true;
8036 
8037     if (!IsNonContiguous)
8038       return;
8039 
8040     const ASTContext &Context = CGF.getContext();
8041 
8042     // For supporting stride in array section, we need to initialize the first
8043     // dimension size as 1, first offset as 0, and first count as 1
8044     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8045     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8046     MapValuesArrayTy CurStrides;
8047     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8048     uint64_t ElementTypeSize;
8049 
8050     // Collect Size information for each dimension and get the element size as
8051     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8052     // should be [10, 10] and the first stride is 4 btyes.
8053     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8054          Components) {
8055       const Expr *AssocExpr = Component.getAssociatedExpression();
8056       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8057 
8058       if (!OASE)
8059         continue;
8060 
8061       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8062       auto *CAT = Context.getAsConstantArrayType(Ty);
8063       auto *VAT = Context.getAsVariableArrayType(Ty);
8064 
8065       // We need all the dimension size except for the last dimension.
8066       assert((VAT || CAT || &Component == &*Components.begin()) &&
8067              "Should be either ConstantArray or VariableArray if not the "
8068              "first Component");
8069 
8070       // Get element size if CurStrides is empty.
8071       if (CurStrides.empty()) {
8072         const Type *ElementType = nullptr;
8073         if (CAT)
8074           ElementType = CAT->getElementType().getTypePtr();
8075         else if (VAT)
8076           ElementType = VAT->getElementType().getTypePtr();
8077         else
8078           assert(&Component == &*Components.begin() &&
8079                  "Only expect pointer (non CAT or VAT) when this is the "
8080                  "first Component");
8081         // If ElementType is null, then it means the base is a pointer
8082         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8083         // for next iteration.
8084         if (ElementType) {
8085           // For the case that having pointer as base, we need to remove one
8086           // level of indirection.
8087           if (&Component != &*Components.begin())
8088             ElementType = ElementType->getPointeeOrArrayElementType();
8089           ElementTypeSize =
8090               Context.getTypeSizeInChars(ElementType).getQuantity();
8091           CurStrides.push_back(
8092               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8093         }
8094       }
8095       // Get dimension value except for the last dimension since we don't need
8096       // it.
8097       if (DimSizes.size() < Components.size() - 1) {
8098         if (CAT)
8099           DimSizes.push_back(llvm::ConstantInt::get(
8100               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8101         else if (VAT)
8102           DimSizes.push_back(CGF.Builder.CreateIntCast(
8103               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8104               /*IsSigned=*/false));
8105       }
8106     }
8107 
8108     // Skip the dummy dimension since we have already have its information.
8109     auto DI = DimSizes.begin() + 1;
8110     // Product of dimension.
8111     llvm::Value *DimProd =
8112         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8113 
8114     // Collect info for non-contiguous. Notice that offset, count, and stride
8115     // are only meaningful for array-section, so we insert a null for anything
8116     // other than array-section.
8117     // Also, the size of offset, count, and stride are not the same as
8118     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8119     // count, and stride are the same as the number of non-contiguous
8120     // declaration in target update to/from clause.
8121     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8122          Components) {
8123       const Expr *AssocExpr = Component.getAssociatedExpression();
8124 
8125       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8126         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8127             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8128             /*isSigned=*/false);
8129         CurOffsets.push_back(Offset);
8130         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8131         CurStrides.push_back(CurStrides.back());
8132         continue;
8133       }
8134 
8135       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8136 
8137       if (!OASE)
8138         continue;
8139 
8140       // Offset
8141       const Expr *OffsetExpr = OASE->getLowerBound();
8142       llvm::Value *Offset = nullptr;
8143       if (!OffsetExpr) {
8144         // If offset is absent, then we just set it to zero.
8145         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8146       } else {
8147         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8148                                            CGF.Int64Ty,
8149                                            /*isSigned=*/false);
8150       }
8151       CurOffsets.push_back(Offset);
8152 
8153       // Count
8154       const Expr *CountExpr = OASE->getLength();
8155       llvm::Value *Count = nullptr;
8156       if (!CountExpr) {
8157         // In Clang, once a high dimension is an array section, we construct all
8158         // the lower dimension as array section, however, for case like
8159         // arr[0:2][2], Clang construct the inner dimension as an array section
8160         // but it actually is not in an array section form according to spec.
8161         if (!OASE->getColonLocFirst().isValid() &&
8162             !OASE->getColonLocSecond().isValid()) {
8163           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8164         } else {
8165           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8166           // When the length is absent it defaults to ⌈(size −
8167           // lower-bound)/stride⌉, where size is the size of the array
8168           // dimension.
8169           const Expr *StrideExpr = OASE->getStride();
8170           llvm::Value *Stride =
8171               StrideExpr
8172                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8173                                               CGF.Int64Ty, /*isSigned=*/false)
8174                   : nullptr;
8175           if (Stride)
8176             Count = CGF.Builder.CreateUDiv(
8177                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8178           else
8179             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8180         }
8181       } else {
8182         Count = CGF.EmitScalarExpr(CountExpr);
8183       }
8184       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8185       CurCounts.push_back(Count);
8186 
8187       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8188       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8189       //              Offset      Count     Stride
8190       //    D0          0           1         4    (int)    <- dummy dimension
8191       //    D1          0           2         8    (2 * (1) * 4)
8192       //    D2          1           2         20   (1 * (1 * 5) * 4)
8193       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8194       const Expr *StrideExpr = OASE->getStride();
8195       llvm::Value *Stride =
8196           StrideExpr
8197               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8198                                           CGF.Int64Ty, /*isSigned=*/false)
8199               : nullptr;
8200       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8201       if (Stride)
8202         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8203       else
8204         CurStrides.push_back(DimProd);
8205       if (DI != DimSizes.end())
8206         ++DI;
8207     }
8208 
8209     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8210     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8211     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8212   }
8213 
8214   /// Return the adjusted map modifiers if the declaration a capture refers to
8215   /// appears in a first-private clause. This is expected to be used only with
8216   /// directives that start with 'target'.
8217   MappableExprsHandler::OpenMPOffloadMappingFlags
8218   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8219     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8220 
8221     // A first private variable captured by reference will use only the
8222     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8223     // declaration is known as first-private in this handler.
8224     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8225       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8226           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8227         return MappableExprsHandler::OMP_MAP_ALWAYS |
8228                MappableExprsHandler::OMP_MAP_TO;
8229       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8230         return MappableExprsHandler::OMP_MAP_TO |
8231                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8232       return MappableExprsHandler::OMP_MAP_PRIVATE |
8233              MappableExprsHandler::OMP_MAP_TO;
8234     }
8235     return MappableExprsHandler::OMP_MAP_TO |
8236            MappableExprsHandler::OMP_MAP_FROM;
8237   }
8238 
8239   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8240     // Rotate by getFlagMemberOffset() bits.
8241     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8242                                                   << getFlagMemberOffset());
8243   }
8244 
8245   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8246                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8247     // If the entry is PTR_AND_OBJ but has not been marked with the special
8248     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8249     // marked as MEMBER_OF.
8250     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8251         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8252       return;
8253 
8254     // Reset the placeholder value to prepare the flag for the assignment of the
8255     // proper MEMBER_OF value.
8256     Flags &= ~OMP_MAP_MEMBER_OF;
8257     Flags |= MemberOfFlag;
8258   }
8259 
8260   void getPlainLayout(const CXXRecordDecl *RD,
8261                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8262                       bool AsBase) const {
8263     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8264 
8265     llvm::StructType *St =
8266         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8267 
8268     unsigned NumElements = St->getNumElements();
8269     llvm::SmallVector<
8270         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8271         RecordLayout(NumElements);
8272 
8273     // Fill bases.
8274     for (const auto &I : RD->bases()) {
8275       if (I.isVirtual())
8276         continue;
8277       const auto *Base = I.getType()->getAsCXXRecordDecl();
8278       // Ignore empty bases.
8279       if (Base->isEmpty() || CGF.getContext()
8280                                  .getASTRecordLayout(Base)
8281                                  .getNonVirtualSize()
8282                                  .isZero())
8283         continue;
8284 
8285       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8286       RecordLayout[FieldIndex] = Base;
8287     }
8288     // Fill in virtual bases.
8289     for (const auto &I : RD->vbases()) {
8290       const auto *Base = I.getType()->getAsCXXRecordDecl();
8291       // Ignore empty bases.
8292       if (Base->isEmpty())
8293         continue;
8294       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8295       if (RecordLayout[FieldIndex])
8296         continue;
8297       RecordLayout[FieldIndex] = Base;
8298     }
8299     // Fill in all the fields.
8300     assert(!RD->isUnion() && "Unexpected union.");
8301     for (const auto *Field : RD->fields()) {
8302       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8303       // will fill in later.)
8304       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8305         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8306         RecordLayout[FieldIndex] = Field;
8307       }
8308     }
8309     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8310              &Data : RecordLayout) {
8311       if (Data.isNull())
8312         continue;
8313       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8314         getPlainLayout(Base, Layout, /*AsBase=*/true);
8315       else
8316         Layout.push_back(Data.get<const FieldDecl *>());
8317     }
8318   }
8319 
8320   /// Generate all the base pointers, section pointers, sizes, map types, and
8321   /// mappers for the extracted mappable expressions (all included in \a
8322   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8323   /// pair of the relevant declaration and index where it occurs is appended to
8324   /// the device pointers info array.
8325   void generateAllInfoForClauses(
8326       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8327       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8328           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8329     // We have to process the component lists that relate with the same
8330     // declaration in a single chunk so that we can generate the map flags
8331     // correctly. Therefore, we organize all lists in a map.
8332     enum MapKind { Present, Allocs, Other, Total };
8333     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8334                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8335         Info;
8336 
8337     // Helper function to fill the information map for the different supported
8338     // clauses.
8339     auto &&InfoGen =
8340         [&Info, &SkipVarSet](
8341             const ValueDecl *D, MapKind Kind,
8342             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8343             OpenMPMapClauseKind MapType,
8344             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8345             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8346             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8347             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8348           if (SkipVarSet.contains(D))
8349             return;
8350           auto It = Info.find(D);
8351           if (It == Info.end())
8352             It = Info
8353                      .insert(std::make_pair(
8354                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8355                      .first;
8356           It->second[Kind].emplace_back(
8357               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8358               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8359         };
8360 
8361     for (const auto *Cl : Clauses) {
8362       const auto *C = dyn_cast<OMPMapClause>(Cl);
8363       if (!C)
8364         continue;
8365       MapKind Kind = Other;
8366       if (!C->getMapTypeModifiers().empty() &&
8367           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8368             return K == OMPC_MAP_MODIFIER_present;
8369           }))
8370         Kind = Present;
8371       else if (C->getMapType() == OMPC_MAP_alloc)
8372         Kind = Allocs;
8373       const auto *EI = C->getVarRefs().begin();
8374       for (const auto L : C->component_lists()) {
8375         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8376         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8377                 C->getMapTypeModifiers(), llvm::None,
8378                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8379                 E);
8380         ++EI;
8381       }
8382     }
8383     for (const auto *Cl : Clauses) {
8384       const auto *C = dyn_cast<OMPToClause>(Cl);
8385       if (!C)
8386         continue;
8387       MapKind Kind = Other;
8388       if (!C->getMotionModifiers().empty() &&
8389           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8390             return K == OMPC_MOTION_MODIFIER_present;
8391           }))
8392         Kind = Present;
8393       const auto *EI = C->getVarRefs().begin();
8394       for (const auto L : C->component_lists()) {
8395         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8396                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8397                 C->isImplicit(), std::get<2>(L), *EI);
8398         ++EI;
8399       }
8400     }
8401     for (const auto *Cl : Clauses) {
8402       const auto *C = dyn_cast<OMPFromClause>(Cl);
8403       if (!C)
8404         continue;
8405       MapKind Kind = Other;
8406       if (!C->getMotionModifiers().empty() &&
8407           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8408             return K == OMPC_MOTION_MODIFIER_present;
8409           }))
8410         Kind = Present;
8411       const auto *EI = C->getVarRefs().begin();
8412       for (const auto L : C->component_lists()) {
8413         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8414                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8415                 C->isImplicit(), std::get<2>(L), *EI);
8416         ++EI;
8417       }
8418     }
8419 
8420     // Look at the use_device_ptr clause information and mark the existing map
8421     // entries as such. If there is no map information for an entry in the
8422     // use_device_ptr list, we create one with map type 'alloc' and zero size
8423     // section. It is the user fault if that was not mapped before. If there is
8424     // no map information and the pointer is a struct member, then we defer the
8425     // emission of that entry until the whole struct has been processed.
8426     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8427                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8428         DeferredInfo;
8429     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8430 
8431     for (const auto *Cl : Clauses) {
8432       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8433       if (!C)
8434         continue;
8435       for (const auto L : C->component_lists()) {
8436         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8437             std::get<1>(L);
8438         assert(!Components.empty() &&
8439                "Not expecting empty list of components!");
8440         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8441         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8442         const Expr *IE = Components.back().getAssociatedExpression();
8443         // If the first component is a member expression, we have to look into
8444         // 'this', which maps to null in the map of map information. Otherwise
8445         // look directly for the information.
8446         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8447 
8448         // We potentially have map information for this declaration already.
8449         // Look for the first set of components that refer to it.
8450         if (It != Info.end()) {
8451           bool Found = false;
8452           for (auto &Data : It->second) {
8453             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8454               return MI.Components.back().getAssociatedDeclaration() == VD;
8455             });
8456             // If we found a map entry, signal that the pointer has to be
8457             // returned and move on to the next declaration. Exclude cases where
8458             // the base pointer is mapped as array subscript, array section or
8459             // array shaping. The base address is passed as a pointer to base in
8460             // this case and cannot be used as a base for use_device_ptr list
8461             // item.
8462             if (CI != Data.end()) {
8463               auto PrevCI = std::next(CI->Components.rbegin());
8464               const auto *VarD = dyn_cast<VarDecl>(VD);
8465               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8466                   isa<MemberExpr>(IE) ||
8467                   !VD->getType().getNonReferenceType()->isPointerType() ||
8468                   PrevCI == CI->Components.rend() ||
8469                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8470                   VarD->hasLocalStorage()) {
8471                 CI->ReturnDevicePointer = true;
8472                 Found = true;
8473                 break;
8474               }
8475             }
8476           }
8477           if (Found)
8478             continue;
8479         }
8480 
8481         // We didn't find any match in our map information - generate a zero
8482         // size array section - if the pointer is a struct member we defer this
8483         // action until the whole struct has been processed.
8484         if (isa<MemberExpr>(IE)) {
8485           // Insert the pointer into Info to be processed by
8486           // generateInfoForComponentList. Because it is a member pointer
8487           // without a pointee, no entry will be generated for it, therefore
8488           // we need to generate one after the whole struct has been processed.
8489           // Nonetheless, generateInfoForComponentList must be called to take
8490           // the pointer into account for the calculation of the range of the
8491           // partial struct.
8492           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8493                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8494                   nullptr);
8495           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8496         } else {
8497           llvm::Value *Ptr =
8498               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8499           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8500           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8501           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8502           UseDevicePtrCombinedInfo.Sizes.push_back(
8503               llvm::Constant::getNullValue(CGF.Int64Ty));
8504           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8505           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8506         }
8507       }
8508     }
8509 
8510     // Look at the use_device_addr clause information and mark the existing map
8511     // entries as such. If there is no map information for an entry in the
8512     // use_device_addr list, we create one with map type 'alloc' and zero size
8513     // section. It is the user fault if that was not mapped before. If there is
8514     // no map information and the pointer is a struct member, then we defer the
8515     // emission of that entry until the whole struct has been processed.
8516     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8517     for (const auto *Cl : Clauses) {
8518       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8519       if (!C)
8520         continue;
8521       for (const auto L : C->component_lists()) {
8522         assert(!std::get<1>(L).empty() &&
8523                "Not expecting empty list of components!");
8524         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8525         if (!Processed.insert(VD).second)
8526           continue;
8527         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8528         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8529         // If the first component is a member expression, we have to look into
8530         // 'this', which maps to null in the map of map information. Otherwise
8531         // look directly for the information.
8532         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8533 
8534         // We potentially have map information for this declaration already.
8535         // Look for the first set of components that refer to it.
8536         if (It != Info.end()) {
8537           bool Found = false;
8538           for (auto &Data : It->second) {
8539             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8540               return MI.Components.back().getAssociatedDeclaration() == VD;
8541             });
8542             // If we found a map entry, signal that the pointer has to be
8543             // returned and move on to the next declaration.
8544             if (CI != Data.end()) {
8545               CI->ReturnDevicePointer = true;
8546               Found = true;
8547               break;
8548             }
8549           }
8550           if (Found)
8551             continue;
8552         }
8553 
8554         // We didn't find any match in our map information - generate a zero
8555         // size array section - if the pointer is a struct member we defer this
8556         // action until the whole struct has been processed.
8557         if (isa<MemberExpr>(IE)) {
8558           // Insert the pointer into Info to be processed by
8559           // generateInfoForComponentList. Because it is a member pointer
8560           // without a pointee, no entry will be generated for it, therefore
8561           // we need to generate one after the whole struct has been processed.
8562           // Nonetheless, generateInfoForComponentList must be called to take
8563           // the pointer into account for the calculation of the range of the
8564           // partial struct.
8565           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8566                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8567                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8568           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8569         } else {
8570           llvm::Value *Ptr;
8571           if (IE->isGLValue())
8572             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8573           else
8574             Ptr = CGF.EmitScalarExpr(IE);
8575           CombinedInfo.Exprs.push_back(VD);
8576           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8577           CombinedInfo.Pointers.push_back(Ptr);
8578           CombinedInfo.Sizes.push_back(
8579               llvm::Constant::getNullValue(CGF.Int64Ty));
8580           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8581           CombinedInfo.Mappers.push_back(nullptr);
8582         }
8583       }
8584     }
8585 
8586     for (const auto &Data : Info) {
8587       StructRangeInfoTy PartialStruct;
8588       // Temporary generated information.
8589       MapCombinedInfoTy CurInfo;
8590       const Decl *D = Data.first;
8591       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8592       for (const auto &M : Data.second) {
8593         for (const MapInfo &L : M) {
8594           assert(!L.Components.empty() &&
8595                  "Not expecting declaration with no component lists.");
8596 
8597           // Remember the current base pointer index.
8598           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8599           CurInfo.NonContigInfo.IsNonContiguous =
8600               L.Components.back().isNonContiguous();
8601           generateInfoForComponentList(
8602               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8603               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8604               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8605 
8606           // If this entry relates with a device pointer, set the relevant
8607           // declaration and add the 'return pointer' flag.
8608           if (L.ReturnDevicePointer) {
8609             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8610                    "Unexpected number of mapped base pointers.");
8611 
8612             const ValueDecl *RelevantVD =
8613                 L.Components.back().getAssociatedDeclaration();
8614             assert(RelevantVD &&
8615                    "No relevant declaration related with device pointer??");
8616 
8617             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8618                 RelevantVD);
8619             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8620           }
8621         }
8622       }
8623 
8624       // Append any pending zero-length pointers which are struct members and
8625       // used with use_device_ptr or use_device_addr.
8626       auto CI = DeferredInfo.find(Data.first);
8627       if (CI != DeferredInfo.end()) {
8628         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8629           llvm::Value *BasePtr;
8630           llvm::Value *Ptr;
8631           if (L.ForDeviceAddr) {
8632             if (L.IE->isGLValue())
8633               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8634             else
8635               Ptr = this->CGF.EmitScalarExpr(L.IE);
8636             BasePtr = Ptr;
8637             // Entry is RETURN_PARAM. Also, set the placeholder value
8638             // MEMBER_OF=FFFF so that the entry is later updated with the
8639             // correct value of MEMBER_OF.
8640             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8641           } else {
8642             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8643             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8644                                              L.IE->getExprLoc());
8645             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8646             // placeholder value MEMBER_OF=FFFF so that the entry is later
8647             // updated with the correct value of MEMBER_OF.
8648             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8649                                     OMP_MAP_MEMBER_OF);
8650           }
8651           CurInfo.Exprs.push_back(L.VD);
8652           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8653           CurInfo.Pointers.push_back(Ptr);
8654           CurInfo.Sizes.push_back(
8655               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8656           CurInfo.Mappers.push_back(nullptr);
8657         }
8658       }
8659       // If there is an entry in PartialStruct it means we have a struct with
8660       // individual members mapped. Emit an extra combined entry.
8661       if (PartialStruct.Base.isValid()) {
8662         CurInfo.NonContigInfo.Dims.push_back(0);
8663         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8664       }
8665 
8666       // We need to append the results of this capture to what we already
8667       // have.
8668       CombinedInfo.append(CurInfo);
8669     }
8670     // Append data for use_device_ptr clauses.
8671     CombinedInfo.append(UseDevicePtrCombinedInfo);
8672   }
8673 
8674 public:
8675   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8676       : CurDir(&Dir), CGF(CGF) {
8677     // Extract firstprivate clause information.
8678     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8679       for (const auto *D : C->varlists())
8680         FirstPrivateDecls.try_emplace(
8681             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8682     // Extract implicit firstprivates from uses_allocators clauses.
8683     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8684       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8685         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8686         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8687           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8688                                         /*Implicit=*/true);
8689         else if (const auto *VD = dyn_cast<VarDecl>(
8690                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8691                          ->getDecl()))
8692           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8693       }
8694     }
8695     // Extract device pointer clause information.
8696     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8697       for (auto L : C->component_lists())
8698         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8699   }
8700 
8701   /// Constructor for the declare mapper directive.
8702   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8703       : CurDir(&Dir), CGF(CGF) {}
8704 
8705   /// Generate code for the combined entry if we have a partially mapped struct
8706   /// and take care of the mapping flags of the arguments corresponding to
8707   /// individual struct members.
8708   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8709                          MapFlagsArrayTy &CurTypes,
8710                          const StructRangeInfoTy &PartialStruct,
8711                          const ValueDecl *VD = nullptr,
8712                          bool NotTargetParams = true) const {
8713     if (CurTypes.size() == 1 &&
8714         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8715         !PartialStruct.IsArraySection)
8716       return;
8717     Address LBAddr = PartialStruct.LowestElem.second;
8718     Address HBAddr = PartialStruct.HighestElem.second;
8719     if (PartialStruct.HasCompleteRecord) {
8720       LBAddr = PartialStruct.LB;
8721       HBAddr = PartialStruct.LB;
8722     }
8723     CombinedInfo.Exprs.push_back(VD);
8724     // Base is the base of the struct
8725     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8726     // Pointer is the address of the lowest element
8727     llvm::Value *LB = LBAddr.getPointer();
8728     CombinedInfo.Pointers.push_back(LB);
8729     // There should not be a mapper for a combined entry.
8730     CombinedInfo.Mappers.push_back(nullptr);
8731     // Size is (addr of {highest+1} element) - (addr of lowest element)
8732     llvm::Value *HB = HBAddr.getPointer();
8733     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8734     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8735     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8736     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8737     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8738                                                   /*isSigned=*/false);
8739     CombinedInfo.Sizes.push_back(Size);
8740     // Map type is always TARGET_PARAM, if generate info for captures.
8741     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8742                                                  : OMP_MAP_TARGET_PARAM);
8743     // If any element has the present modifier, then make sure the runtime
8744     // doesn't attempt to allocate the struct.
8745     if (CurTypes.end() !=
8746         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8747           return Type & OMP_MAP_PRESENT;
8748         }))
8749       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8750     // Remove TARGET_PARAM flag from the first element
8751     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8752 
8753     // All other current entries will be MEMBER_OF the combined entry
8754     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8755     // 0xFFFF in the MEMBER_OF field).
8756     OpenMPOffloadMappingFlags MemberOfFlag =
8757         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8758     for (auto &M : CurTypes)
8759       setCorrectMemberOfFlag(M, MemberOfFlag);
8760   }
8761 
8762   /// Generate all the base pointers, section pointers, sizes, map types, and
8763   /// mappers for the extracted mappable expressions (all included in \a
8764   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8765   /// pair of the relevant declaration and index where it occurs is appended to
8766   /// the device pointers info array.
8767   void generateAllInfo(
8768       MapCombinedInfoTy &CombinedInfo,
8769       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8770           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8771     assert(CurDir.is<const OMPExecutableDirective *>() &&
8772            "Expect a executable directive");
8773     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8774     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8775   }
8776 
8777   /// Generate all the base pointers, section pointers, sizes, map types, and
8778   /// mappers for the extracted map clauses of user-defined mapper (all included
8779   /// in \a CombinedInfo).
8780   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8781     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8782            "Expect a declare mapper directive");
8783     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8784     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8785   }
8786 
8787   /// Emit capture info for lambdas for variables captured by reference.
8788   void generateInfoForLambdaCaptures(
8789       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8790       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8791     const auto *RD = VD->getType()
8792                          .getCanonicalType()
8793                          .getNonReferenceType()
8794                          ->getAsCXXRecordDecl();
8795     if (!RD || !RD->isLambda())
8796       return;
8797     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8798     LValue VDLVal = CGF.MakeAddrLValue(
8799         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8800     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8801     FieldDecl *ThisCapture = nullptr;
8802     RD->getCaptureFields(Captures, ThisCapture);
8803     if (ThisCapture) {
8804       LValue ThisLVal =
8805           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8806       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8807       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8808                                  VDLVal.getPointer(CGF));
8809       CombinedInfo.Exprs.push_back(VD);
8810       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8811       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8812       CombinedInfo.Sizes.push_back(
8813           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8814                                     CGF.Int64Ty, /*isSigned=*/true));
8815       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8816                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8817       CombinedInfo.Mappers.push_back(nullptr);
8818     }
8819     for (const LambdaCapture &LC : RD->captures()) {
8820       if (!LC.capturesVariable())
8821         continue;
8822       const VarDecl *VD = LC.getCapturedVar();
8823       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8824         continue;
8825       auto It = Captures.find(VD);
8826       assert(It != Captures.end() && "Found lambda capture without field.");
8827       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8828       if (LC.getCaptureKind() == LCK_ByRef) {
8829         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8830         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8831                                    VDLVal.getPointer(CGF));
8832         CombinedInfo.Exprs.push_back(VD);
8833         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8834         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8835         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8836             CGF.getTypeSize(
8837                 VD->getType().getCanonicalType().getNonReferenceType()),
8838             CGF.Int64Ty, /*isSigned=*/true));
8839       } else {
8840         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8841         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8842                                    VDLVal.getPointer(CGF));
8843         CombinedInfo.Exprs.push_back(VD);
8844         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8845         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8846         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8847       }
8848       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8849                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8850       CombinedInfo.Mappers.push_back(nullptr);
8851     }
8852   }
8853 
8854   /// Set correct indices for lambdas captures.
8855   void adjustMemberOfForLambdaCaptures(
8856       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8857       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8858       MapFlagsArrayTy &Types) const {
8859     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8860       // Set correct member_of idx for all implicit lambda captures.
8861       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8862                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8863         continue;
8864       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8865       assert(BasePtr && "Unable to find base lambda address.");
8866       int TgtIdx = -1;
8867       for (unsigned J = I; J > 0; --J) {
8868         unsigned Idx = J - 1;
8869         if (Pointers[Idx] != BasePtr)
8870           continue;
8871         TgtIdx = Idx;
8872         break;
8873       }
8874       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8875       // All other current entries will be MEMBER_OF the combined entry
8876       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8877       // 0xFFFF in the MEMBER_OF field).
8878       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8879       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8880     }
8881   }
8882 
8883   /// Generate the base pointers, section pointers, sizes, map types, and
8884   /// mappers associated to a given capture (all included in \a CombinedInfo).
8885   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8886                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8887                               StructRangeInfoTy &PartialStruct) const {
8888     assert(!Cap->capturesVariableArrayType() &&
8889            "Not expecting to generate map info for a variable array type!");
8890 
8891     // We need to know when we generating information for the first component
8892     const ValueDecl *VD = Cap->capturesThis()
8893                               ? nullptr
8894                               : Cap->getCapturedVar()->getCanonicalDecl();
8895 
8896     // If this declaration appears in a is_device_ptr clause we just have to
8897     // pass the pointer by value. If it is a reference to a declaration, we just
8898     // pass its value.
8899     if (DevPointersMap.count(VD)) {
8900       CombinedInfo.Exprs.push_back(VD);
8901       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8902       CombinedInfo.Pointers.push_back(Arg);
8903       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8904           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8905           /*isSigned=*/true));
8906       CombinedInfo.Types.push_back(
8907           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8908           OMP_MAP_TARGET_PARAM);
8909       CombinedInfo.Mappers.push_back(nullptr);
8910       return;
8911     }
8912 
8913     using MapData =
8914         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8915                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8916                    const ValueDecl *, const Expr *>;
8917     SmallVector<MapData, 4> DeclComponentLists;
8918     assert(CurDir.is<const OMPExecutableDirective *>() &&
8919            "Expect a executable directive");
8920     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8921     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8922       const auto *EI = C->getVarRefs().begin();
8923       for (const auto L : C->decl_component_lists(VD)) {
8924         const ValueDecl *VDecl, *Mapper;
8925         // The Expression is not correct if the mapping is implicit
8926         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8927         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8928         std::tie(VDecl, Components, Mapper) = L;
8929         assert(VDecl == VD && "We got information for the wrong declaration??");
8930         assert(!Components.empty() &&
8931                "Not expecting declaration with no component lists.");
8932         DeclComponentLists.emplace_back(Components, C->getMapType(),
8933                                         C->getMapTypeModifiers(),
8934                                         C->isImplicit(), Mapper, E);
8935         ++EI;
8936       }
8937     }
8938     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8939                                              const MapData &RHS) {
8940       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8941       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8942       bool HasPresent = !MapModifiers.empty() &&
8943                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8944                           return K == clang::OMPC_MAP_MODIFIER_present;
8945                         });
8946       bool HasAllocs = MapType == OMPC_MAP_alloc;
8947       MapModifiers = std::get<2>(RHS);
8948       MapType = std::get<1>(LHS);
8949       bool HasPresentR =
8950           !MapModifiers.empty() &&
8951           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8952             return K == clang::OMPC_MAP_MODIFIER_present;
8953           });
8954       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8955       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8956     });
8957 
8958     // Find overlapping elements (including the offset from the base element).
8959     llvm::SmallDenseMap<
8960         const MapData *,
8961         llvm::SmallVector<
8962             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8963         4>
8964         OverlappedData;
8965     size_t Count = 0;
8966     for (const MapData &L : DeclComponentLists) {
8967       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8968       OpenMPMapClauseKind MapType;
8969       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8970       bool IsImplicit;
8971       const ValueDecl *Mapper;
8972       const Expr *VarRef;
8973       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8974           L;
8975       ++Count;
8976       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8977         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8978         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8979                  VarRef) = L1;
8980         auto CI = Components.rbegin();
8981         auto CE = Components.rend();
8982         auto SI = Components1.rbegin();
8983         auto SE = Components1.rend();
8984         for (; CI != CE && SI != SE; ++CI, ++SI) {
8985           if (CI->getAssociatedExpression()->getStmtClass() !=
8986               SI->getAssociatedExpression()->getStmtClass())
8987             break;
8988           // Are we dealing with different variables/fields?
8989           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8990             break;
8991         }
8992         // Found overlapping if, at least for one component, reached the head
8993         // of the components list.
8994         if (CI == CE || SI == SE) {
8995           // Ignore it if it is the same component.
8996           if (CI == CE && SI == SE)
8997             continue;
8998           const auto It = (SI == SE) ? CI : SI;
8999           // If one component is a pointer and another one is a kind of
9000           // dereference of this pointer (array subscript, section, dereference,
9001           // etc.), it is not an overlapping.
9002           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9003               std::prev(It)
9004                   ->getAssociatedExpression()
9005                   ->getType()
9006                   ->isPointerType())
9007             continue;
9008           const MapData &BaseData = CI == CE ? L : L1;
9009           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9010               SI == SE ? Components : Components1;
9011           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9012           OverlappedElements.getSecond().push_back(SubData);
9013         }
9014       }
9015     }
9016     // Sort the overlapped elements for each item.
9017     llvm::SmallVector<const FieldDecl *, 4> Layout;
9018     if (!OverlappedData.empty()) {
9019       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9020       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9021       while (BaseType != OrigType) {
9022         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9023         OrigType = BaseType->getPointeeOrArrayElementType();
9024       }
9025 
9026       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9027         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9028       else {
9029         const auto *RD = BaseType->getAsRecordDecl();
9030         Layout.append(RD->field_begin(), RD->field_end());
9031       }
9032     }
9033     for (auto &Pair : OverlappedData) {
9034       llvm::stable_sort(
9035           Pair.getSecond(),
9036           [&Layout](
9037               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9038               OMPClauseMappableExprCommon::MappableExprComponentListRef
9039                   Second) {
9040             auto CI = First.rbegin();
9041             auto CE = First.rend();
9042             auto SI = Second.rbegin();
9043             auto SE = Second.rend();
9044             for (; CI != CE && SI != SE; ++CI, ++SI) {
9045               if (CI->getAssociatedExpression()->getStmtClass() !=
9046                   SI->getAssociatedExpression()->getStmtClass())
9047                 break;
9048               // Are we dealing with different variables/fields?
9049               if (CI->getAssociatedDeclaration() !=
9050                   SI->getAssociatedDeclaration())
9051                 break;
9052             }
9053 
9054             // Lists contain the same elements.
9055             if (CI == CE && SI == SE)
9056               return false;
9057 
9058             // List with less elements is less than list with more elements.
9059             if (CI == CE || SI == SE)
9060               return CI == CE;
9061 
9062             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9063             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9064             if (FD1->getParent() == FD2->getParent())
9065               return FD1->getFieldIndex() < FD2->getFieldIndex();
9066             const auto It =
9067                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9068                   return FD == FD1 || FD == FD2;
9069                 });
9070             return *It == FD1;
9071           });
9072     }
9073 
9074     // Associated with a capture, because the mapping flags depend on it.
9075     // Go through all of the elements with the overlapped elements.
9076     bool IsFirstComponentList = true;
9077     for (const auto &Pair : OverlappedData) {
9078       const MapData &L = *Pair.getFirst();
9079       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9080       OpenMPMapClauseKind MapType;
9081       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9082       bool IsImplicit;
9083       const ValueDecl *Mapper;
9084       const Expr *VarRef;
9085       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9086           L;
9087       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9088           OverlappedComponents = Pair.getSecond();
9089       generateInfoForComponentList(
9090           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9091           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9092           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9093       IsFirstComponentList = false;
9094     }
9095     // Go through other elements without overlapped elements.
9096     for (const MapData &L : DeclComponentLists) {
9097       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9098       OpenMPMapClauseKind MapType;
9099       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9100       bool IsImplicit;
9101       const ValueDecl *Mapper;
9102       const Expr *VarRef;
9103       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9104           L;
9105       auto It = OverlappedData.find(&L);
9106       if (It == OverlappedData.end())
9107         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9108                                      Components, CombinedInfo, PartialStruct,
9109                                      IsFirstComponentList, IsImplicit, Mapper,
9110                                      /*ForDeviceAddr=*/false, VD, VarRef);
9111       IsFirstComponentList = false;
9112     }
9113   }
9114 
9115   /// Generate the default map information for a given capture \a CI,
9116   /// record field declaration \a RI and captured value \a CV.
9117   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9118                               const FieldDecl &RI, llvm::Value *CV,
9119                               MapCombinedInfoTy &CombinedInfo) const {
9120     bool IsImplicit = true;
9121     // Do the default mapping.
9122     if (CI.capturesThis()) {
9123       CombinedInfo.Exprs.push_back(nullptr);
9124       CombinedInfo.BasePointers.push_back(CV);
9125       CombinedInfo.Pointers.push_back(CV);
9126       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9127       CombinedInfo.Sizes.push_back(
9128           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9129                                     CGF.Int64Ty, /*isSigned=*/true));
9130       // Default map type.
9131       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9132     } else if (CI.capturesVariableByCopy()) {
9133       const VarDecl *VD = CI.getCapturedVar();
9134       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9135       CombinedInfo.BasePointers.push_back(CV);
9136       CombinedInfo.Pointers.push_back(CV);
9137       if (!RI.getType()->isAnyPointerType()) {
9138         // We have to signal to the runtime captures passed by value that are
9139         // not pointers.
9140         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9141         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9142             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9143       } else {
9144         // Pointers are implicitly mapped with a zero size and no flags
9145         // (other than first map that is added for all implicit maps).
9146         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9147         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9148       }
9149       auto I = FirstPrivateDecls.find(VD);
9150       if (I != FirstPrivateDecls.end())
9151         IsImplicit = I->getSecond();
9152     } else {
9153       assert(CI.capturesVariable() && "Expected captured reference.");
9154       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9155       QualType ElementType = PtrTy->getPointeeType();
9156       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9157           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9158       // The default map type for a scalar/complex type is 'to' because by
9159       // default the value doesn't have to be retrieved. For an aggregate
9160       // type, the default is 'tofrom'.
9161       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9162       const VarDecl *VD = CI.getCapturedVar();
9163       auto I = FirstPrivateDecls.find(VD);
9164       if (I != FirstPrivateDecls.end() &&
9165           VD->getType().isConstant(CGF.getContext())) {
9166         llvm::Constant *Addr =
9167             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9168         // Copy the value of the original variable to the new global copy.
9169         CGF.Builder.CreateMemCpy(
9170             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9171             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9172             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9173         // Use new global variable as the base pointers.
9174         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9175         CombinedInfo.BasePointers.push_back(Addr);
9176         CombinedInfo.Pointers.push_back(Addr);
9177       } else {
9178         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9179         CombinedInfo.BasePointers.push_back(CV);
9180         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9181           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9182               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9183               AlignmentSource::Decl));
9184           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9185         } else {
9186           CombinedInfo.Pointers.push_back(CV);
9187         }
9188       }
9189       if (I != FirstPrivateDecls.end())
9190         IsImplicit = I->getSecond();
9191     }
9192     // Every default map produces a single argument which is a target parameter.
9193     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9194 
9195     // Add flag stating this is an implicit map.
9196     if (IsImplicit)
9197       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9198 
9199     // No user-defined mapper for default mapping.
9200     CombinedInfo.Mappers.push_back(nullptr);
9201   }
9202 };
9203 } // anonymous namespace
9204 
9205 static void emitNonContiguousDescriptor(
9206     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9207     CGOpenMPRuntime::TargetDataInfo &Info) {
9208   CodeGenModule &CGM = CGF.CGM;
9209   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9210       &NonContigInfo = CombinedInfo.NonContigInfo;
9211 
9212   // Build an array of struct descriptor_dim and then assign it to
9213   // offload_args.
9214   //
9215   // struct descriptor_dim {
9216   //  uint64_t offset;
9217   //  uint64_t count;
9218   //  uint64_t stride
9219   // };
9220   ASTContext &C = CGF.getContext();
9221   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9222   RecordDecl *RD;
9223   RD = C.buildImplicitRecord("descriptor_dim");
9224   RD->startDefinition();
9225   addFieldToRecordDecl(C, RD, Int64Ty);
9226   addFieldToRecordDecl(C, RD, Int64Ty);
9227   addFieldToRecordDecl(C, RD, Int64Ty);
9228   RD->completeDefinition();
9229   QualType DimTy = C.getRecordType(RD);
9230 
9231   enum { OffsetFD = 0, CountFD, StrideFD };
9232   // We need two index variable here since the size of "Dims" is the same as the
9233   // size of Components, however, the size of offset, count, and stride is equal
9234   // to the size of base declaration that is non-contiguous.
9235   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9236     // Skip emitting ir if dimension size is 1 since it cannot be
9237     // non-contiguous.
9238     if (NonContigInfo.Dims[I] == 1)
9239       continue;
9240     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9241     QualType ArrayTy =
9242         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9243     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9244     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9245       unsigned RevIdx = EE - II - 1;
9246       LValue DimsLVal = CGF.MakeAddrLValue(
9247           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9248       // Offset
9249       LValue OffsetLVal = CGF.EmitLValueForField(
9250           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9251       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9252       // Count
9253       LValue CountLVal = CGF.EmitLValueForField(
9254           DimsLVal, *std::next(RD->field_begin(), CountFD));
9255       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9256       // Stride
9257       LValue StrideLVal = CGF.EmitLValueForField(
9258           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9259       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9260     }
9261     // args[I] = &dims
9262     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9263         DimsAddr, CGM.Int8PtrTy);
9264     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9265         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9266         Info.PointersArray, 0, I);
9267     Address PAddr(P, CGF.getPointerAlign());
9268     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9269     ++L;
9270   }
9271 }
9272 
9273 /// Emit a string constant containing the names of the values mapped to the
9274 /// offloading runtime library.
9275 llvm::Constant *
9276 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9277                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9278   llvm::Constant *SrcLocStr;
9279   if (!MapExprs.getMapDecl()) {
9280     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9281   } else {
9282     std::string ExprName = "";
9283     if (MapExprs.getMapExpr()) {
9284       PrintingPolicy P(CGF.getContext().getLangOpts());
9285       llvm::raw_string_ostream OS(ExprName);
9286       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9287       OS.flush();
9288     } else {
9289       ExprName = MapExprs.getMapDecl()->getNameAsString();
9290     }
9291 
9292     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9293     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9294     const char *FileName = PLoc.getFilename();
9295     unsigned Line = PLoc.getLine();
9296     unsigned Column = PLoc.getColumn();
9297     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9298                                                 Line, Column);
9299   }
9300   return SrcLocStr;
9301 }
9302 
9303 /// Emit the arrays used to pass the captures and map information to the
9304 /// offloading runtime library. If there is no map or capture information,
9305 /// return nullptr by reference.
9306 static void emitOffloadingArrays(
9307     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9308     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9309     bool IsNonContiguous = false) {
9310   CodeGenModule &CGM = CGF.CGM;
9311   ASTContext &Ctx = CGF.getContext();
9312 
9313   // Reset the array information.
9314   Info.clearArrayInfo();
9315   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9316 
9317   if (Info.NumberOfPtrs) {
9318     // Detect if we have any capture size requiring runtime evaluation of the
9319     // size so that a constant array could be eventually used.
9320     bool hasRuntimeEvaluationCaptureSize = false;
9321     for (llvm::Value *S : CombinedInfo.Sizes)
9322       if (!isa<llvm::Constant>(S)) {
9323         hasRuntimeEvaluationCaptureSize = true;
9324         break;
9325       }
9326 
9327     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9328     QualType PointerArrayType = Ctx.getConstantArrayType(
9329         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9330         /*IndexTypeQuals=*/0);
9331 
9332     Info.BasePointersArray =
9333         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9334     Info.PointersArray =
9335         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9336     Address MappersArray =
9337         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9338     Info.MappersArray = MappersArray.getPointer();
9339 
9340     // If we don't have any VLA types or other types that require runtime
9341     // evaluation, we can use a constant array for the map sizes, otherwise we
9342     // need to fill up the arrays as we do for the pointers.
9343     QualType Int64Ty =
9344         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9345     if (hasRuntimeEvaluationCaptureSize) {
9346       QualType SizeArrayType = Ctx.getConstantArrayType(
9347           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9348           /*IndexTypeQuals=*/0);
9349       Info.SizesArray =
9350           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9351     } else {
9352       // We expect all the sizes to be constant, so we collect them to create
9353       // a constant array.
9354       SmallVector<llvm::Constant *, 16> ConstSizes;
9355       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9356         if (IsNonContiguous &&
9357             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9358           ConstSizes.push_back(llvm::ConstantInt::get(
9359               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9360         } else {
9361           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9362         }
9363       }
9364 
9365       auto *SizesArrayInit = llvm::ConstantArray::get(
9366           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9367       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9368       auto *SizesArrayGbl = new llvm::GlobalVariable(
9369           CGM.getModule(), SizesArrayInit->getType(),
9370           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9371           SizesArrayInit, Name);
9372       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9373       Info.SizesArray = SizesArrayGbl;
9374     }
9375 
9376     // The map types are always constant so we don't need to generate code to
9377     // fill arrays. Instead, we create an array constant.
9378     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9379     llvm::copy(CombinedInfo.Types, Mapping.begin());
9380     llvm::Constant *MapTypesArrayInit =
9381         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9382     std::string MaptypesName =
9383         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9384     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9385         CGM.getModule(), MapTypesArrayInit->getType(),
9386         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9387         MapTypesArrayInit, MaptypesName);
9388     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9389     Info.MapTypesArray = MapTypesArrayGbl;
9390 
9391     // The information types are only built if there is debug information
9392     // requested.
9393     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9394       Info.MapNamesArray = llvm::Constant::getNullValue(
9395           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9396     } else {
9397       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9398         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9399       };
9400       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9401       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9402 
9403       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9404           llvm::ArrayType::get(
9405               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9406               CombinedInfo.Exprs.size()),
9407           InfoMap);
9408       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9409           CGM.getModule(), MapNamesArrayInit->getType(),
9410           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9411           MapNamesArrayInit,
9412           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9413       Info.MapNamesArray = MapNamesArrayGbl;
9414     }
9415 
9416     // If there's a present map type modifier, it must not be applied to the end
9417     // of a region, so generate a separate map type array in that case.
9418     if (Info.separateBeginEndCalls()) {
9419       bool EndMapTypesDiffer = false;
9420       for (uint64_t &Type : Mapping) {
9421         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9422           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9423           EndMapTypesDiffer = true;
9424         }
9425       }
9426       if (EndMapTypesDiffer) {
9427         MapTypesArrayInit =
9428             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9429         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9430         MapTypesArrayGbl = new llvm::GlobalVariable(
9431             CGM.getModule(), MapTypesArrayInit->getType(),
9432             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9433             MapTypesArrayInit, MaptypesName);
9434         MapTypesArrayGbl->setUnnamedAddr(
9435             llvm::GlobalValue::UnnamedAddr::Global);
9436         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9437       }
9438     }
9439 
9440     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9441       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9442       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9443           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9444           Info.BasePointersArray, 0, I);
9445       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9446           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9447       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9448       CGF.Builder.CreateStore(BPVal, BPAddr);
9449 
9450       if (Info.requiresDevicePointerInfo())
9451         if (const ValueDecl *DevVD =
9452                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9453           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9454 
9455       llvm::Value *PVal = CombinedInfo.Pointers[I];
9456       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9457           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9458           Info.PointersArray, 0, I);
9459       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9460           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9461       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9462       CGF.Builder.CreateStore(PVal, PAddr);
9463 
9464       if (hasRuntimeEvaluationCaptureSize) {
9465         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9466             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9467             Info.SizesArray,
9468             /*Idx0=*/0,
9469             /*Idx1=*/I);
9470         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9471         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9472                                                           CGM.Int64Ty,
9473                                                           /*isSigned=*/true),
9474                                 SAddr);
9475       }
9476 
9477       // Fill up the mapper array.
9478       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9479       if (CombinedInfo.Mappers[I]) {
9480         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9481             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9482         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9483         Info.HasMapper = true;
9484       }
9485       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9486       CGF.Builder.CreateStore(MFunc, MAddr);
9487     }
9488   }
9489 
9490   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9491       Info.NumberOfPtrs == 0)
9492     return;
9493 
9494   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9495 }
9496 
9497 namespace {
9498 /// Additional arguments for emitOffloadingArraysArgument function.
9499 struct ArgumentsOptions {
9500   bool ForEndCall = false;
9501   ArgumentsOptions() = default;
9502   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9503 };
9504 } // namespace
9505 
9506 /// Emit the arguments to be passed to the runtime library based on the
9507 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9508 /// ForEndCall, emit map types to be passed for the end of the region instead of
9509 /// the beginning.
9510 static void emitOffloadingArraysArgument(
9511     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9512     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9513     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9514     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9515     const ArgumentsOptions &Options = ArgumentsOptions()) {
9516   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9517          "expected region end call to runtime only when end call is separate");
9518   CodeGenModule &CGM = CGF.CGM;
9519   if (Info.NumberOfPtrs) {
9520     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9521         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9522         Info.BasePointersArray,
9523         /*Idx0=*/0, /*Idx1=*/0);
9524     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9525         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9526         Info.PointersArray,
9527         /*Idx0=*/0,
9528         /*Idx1=*/0);
9529     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9530         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9531         /*Idx0=*/0, /*Idx1=*/0);
9532     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9533         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9534         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9535                                                     : Info.MapTypesArray,
9536         /*Idx0=*/0,
9537         /*Idx1=*/0);
9538 
9539     // Only emit the mapper information arrays if debug information is
9540     // requested.
9541     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9542       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9543     else
9544       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9545           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9546           Info.MapNamesArray,
9547           /*Idx0=*/0,
9548           /*Idx1=*/0);
9549     // If there is no user-defined mapper, set the mapper array to nullptr to
9550     // avoid an unnecessary data privatization
9551     if (!Info.HasMapper)
9552       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9553     else
9554       MappersArrayArg =
9555           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9556   } else {
9557     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9558     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9559     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9560     MapTypesArrayArg =
9561         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9562     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9563     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9564   }
9565 }
9566 
9567 /// Check for inner distribute directive.
9568 static const OMPExecutableDirective *
9569 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9570   const auto *CS = D.getInnermostCapturedStmt();
9571   const auto *Body =
9572       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9573   const Stmt *ChildStmt =
9574       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9575 
9576   if (const auto *NestedDir =
9577           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9578     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9579     switch (D.getDirectiveKind()) {
9580     case OMPD_target:
9581       if (isOpenMPDistributeDirective(DKind))
9582         return NestedDir;
9583       if (DKind == OMPD_teams) {
9584         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9585             /*IgnoreCaptured=*/true);
9586         if (!Body)
9587           return nullptr;
9588         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9589         if (const auto *NND =
9590                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9591           DKind = NND->getDirectiveKind();
9592           if (isOpenMPDistributeDirective(DKind))
9593             return NND;
9594         }
9595       }
9596       return nullptr;
9597     case OMPD_target_teams:
9598       if (isOpenMPDistributeDirective(DKind))
9599         return NestedDir;
9600       return nullptr;
9601     case OMPD_target_parallel:
9602     case OMPD_target_simd:
9603     case OMPD_target_parallel_for:
9604     case OMPD_target_parallel_for_simd:
9605       return nullptr;
9606     case OMPD_target_teams_distribute:
9607     case OMPD_target_teams_distribute_simd:
9608     case OMPD_target_teams_distribute_parallel_for:
9609     case OMPD_target_teams_distribute_parallel_for_simd:
9610     case OMPD_parallel:
9611     case OMPD_for:
9612     case OMPD_parallel_for:
9613     case OMPD_parallel_master:
9614     case OMPD_parallel_sections:
9615     case OMPD_for_simd:
9616     case OMPD_parallel_for_simd:
9617     case OMPD_cancel:
9618     case OMPD_cancellation_point:
9619     case OMPD_ordered:
9620     case OMPD_threadprivate:
9621     case OMPD_allocate:
9622     case OMPD_task:
9623     case OMPD_simd:
9624     case OMPD_tile:
9625     case OMPD_sections:
9626     case OMPD_section:
9627     case OMPD_single:
9628     case OMPD_master:
9629     case OMPD_critical:
9630     case OMPD_taskyield:
9631     case OMPD_barrier:
9632     case OMPD_taskwait:
9633     case OMPD_taskgroup:
9634     case OMPD_atomic:
9635     case OMPD_flush:
9636     case OMPD_depobj:
9637     case OMPD_scan:
9638     case OMPD_teams:
9639     case OMPD_target_data:
9640     case OMPD_target_exit_data:
9641     case OMPD_target_enter_data:
9642     case OMPD_distribute:
9643     case OMPD_distribute_simd:
9644     case OMPD_distribute_parallel_for:
9645     case OMPD_distribute_parallel_for_simd:
9646     case OMPD_teams_distribute:
9647     case OMPD_teams_distribute_simd:
9648     case OMPD_teams_distribute_parallel_for:
9649     case OMPD_teams_distribute_parallel_for_simd:
9650     case OMPD_target_update:
9651     case OMPD_declare_simd:
9652     case OMPD_declare_variant:
9653     case OMPD_begin_declare_variant:
9654     case OMPD_end_declare_variant:
9655     case OMPD_declare_target:
9656     case OMPD_end_declare_target:
9657     case OMPD_declare_reduction:
9658     case OMPD_declare_mapper:
9659     case OMPD_taskloop:
9660     case OMPD_taskloop_simd:
9661     case OMPD_master_taskloop:
9662     case OMPD_master_taskloop_simd:
9663     case OMPD_parallel_master_taskloop:
9664     case OMPD_parallel_master_taskloop_simd:
9665     case OMPD_requires:
9666     case OMPD_unknown:
9667     default:
9668       llvm_unreachable("Unexpected directive.");
9669     }
9670   }
9671 
9672   return nullptr;
9673 }
9674 
9675 /// Emit the user-defined mapper function. The code generation follows the
9676 /// pattern in the example below.
9677 /// \code
9678 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9679 ///                                           void *base, void *begin,
9680 ///                                           int64_t size, int64_t type,
9681 ///                                           void *name = nullptr) {
9682 ///   // Allocate space for an array section first or add a base/begin for
9683 ///   // pointer dereference.
9684 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9685 ///       !maptype.IsDelete)
9686 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9687 ///                                 size*sizeof(Ty), clearToFromMember(type));
9688 ///   // Map members.
9689 ///   for (unsigned i = 0; i < size; i++) {
9690 ///     // For each component specified by this mapper:
9691 ///     for (auto c : begin[i]->all_components) {
9692 ///       if (c.hasMapper())
9693 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9694 ///                       c.arg_type, c.arg_name);
9695 ///       else
9696 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9697 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9698 ///                                     c.arg_name);
9699 ///     }
9700 ///   }
9701 ///   // Delete the array section.
9702 ///   if (size > 1 && maptype.IsDelete)
9703 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9704 ///                                 size*sizeof(Ty), clearToFromMember(type));
9705 /// }
9706 /// \endcode
9707 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9708                                             CodeGenFunction *CGF) {
9709   if (UDMMap.count(D) > 0)
9710     return;
9711   ASTContext &C = CGM.getContext();
9712   QualType Ty = D->getType();
9713   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9714   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9715   auto *MapperVarDecl =
9716       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9717   SourceLocation Loc = D->getLocation();
9718   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9719 
9720   // Prepare mapper function arguments and attributes.
9721   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9722                               C.VoidPtrTy, ImplicitParamDecl::Other);
9723   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9724                             ImplicitParamDecl::Other);
9725   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9726                              C.VoidPtrTy, ImplicitParamDecl::Other);
9727   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9728                             ImplicitParamDecl::Other);
9729   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9730                             ImplicitParamDecl::Other);
9731   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9732                             ImplicitParamDecl::Other);
9733   FunctionArgList Args;
9734   Args.push_back(&HandleArg);
9735   Args.push_back(&BaseArg);
9736   Args.push_back(&BeginArg);
9737   Args.push_back(&SizeArg);
9738   Args.push_back(&TypeArg);
9739   Args.push_back(&NameArg);
9740   const CGFunctionInfo &FnInfo =
9741       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9742   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9743   SmallString<64> TyStr;
9744   llvm::raw_svector_ostream Out(TyStr);
9745   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9746   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9747   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9748                                     Name, &CGM.getModule());
9749   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9750   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9751   // Start the mapper function code generation.
9752   CodeGenFunction MapperCGF(CGM);
9753   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9754   // Compute the starting and end addresses of array elements.
9755   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9756       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9757       C.getPointerType(Int64Ty), Loc);
9758   // Prepare common arguments for array initiation and deletion.
9759   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9760       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9761       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9762   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9763       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9764       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9765   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9766       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9767       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9768   // Convert the size in bytes into the number of array elements.
9769   Size = MapperCGF.Builder.CreateExactUDiv(
9770       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9771   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9772       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9773   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9774   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9775       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9776       C.getPointerType(Int64Ty), Loc);
9777   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9778       MapperCGF.GetAddrOfLocalVar(&NameArg),
9779       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9780 
9781   // Emit array initiation if this is an array section and \p MapType indicates
9782   // that memory allocation is required.
9783   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9784   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9785                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9786 
9787   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9788 
9789   // Emit the loop header block.
9790   MapperCGF.EmitBlock(HeadBB);
9791   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9792   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9793   // Evaluate whether the initial condition is satisfied.
9794   llvm::Value *IsEmpty =
9795       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9796   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9797   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9798 
9799   // Emit the loop body block.
9800   MapperCGF.EmitBlock(BodyBB);
9801   llvm::BasicBlock *LastBB = BodyBB;
9802   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9803       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9804   PtrPHI->addIncoming(PtrBegin, EntryBB);
9805   Address PtrCurrent =
9806       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9807                           .getAlignment()
9808                           .alignmentOfArrayElement(ElementSize));
9809   // Privatize the declared variable of mapper to be the current array element.
9810   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9811   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9812   (void)Scope.Privatize();
9813 
9814   // Get map clause information. Fill up the arrays with all mapped variables.
9815   MappableExprsHandler::MapCombinedInfoTy Info;
9816   MappableExprsHandler MEHandler(*D, MapperCGF);
9817   MEHandler.generateAllInfoForMapper(Info);
9818 
9819   // Call the runtime API __tgt_mapper_num_components to get the number of
9820   // pre-existing components.
9821   llvm::Value *OffloadingArgs[] = {Handle};
9822   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9823       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9824                                             OMPRTL___tgt_mapper_num_components),
9825       OffloadingArgs);
9826   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9827       PreviousSize,
9828       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9829 
9830   // Fill up the runtime mapper handle for all components.
9831   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9832     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9833         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9834     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9835         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9836     llvm::Value *CurSizeArg = Info.Sizes[I];
9837     llvm::Value *CurNameArg =
9838         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9839             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9840             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9841 
9842     // Extract the MEMBER_OF field from the map type.
9843     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9844     llvm::Value *MemberMapType =
9845         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9846 
9847     // Combine the map type inherited from user-defined mapper with that
9848     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9849     // bits of the \a MapType, which is the input argument of the mapper
9850     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9851     // bits of MemberMapType.
9852     // [OpenMP 5.0], 1.2.6. map-type decay.
9853     //        | alloc |  to   | from  | tofrom | release | delete
9854     // ----------------------------------------------------------
9855     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9856     // to     | alloc |  to   | alloc |   to   | release | delete
9857     // from   | alloc | alloc | from  |  from  | release | delete
9858     // tofrom | alloc |  to   | from  | tofrom | release | delete
9859     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9860         MapType,
9861         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9862                                    MappableExprsHandler::OMP_MAP_FROM));
9863     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9864     llvm::BasicBlock *AllocElseBB =
9865         MapperCGF.createBasicBlock("omp.type.alloc.else");
9866     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9867     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9868     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9869     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9870     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9871     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9872     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9873     MapperCGF.EmitBlock(AllocBB);
9874     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9875         MemberMapType,
9876         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9877                                      MappableExprsHandler::OMP_MAP_FROM)));
9878     MapperCGF.Builder.CreateBr(EndBB);
9879     MapperCGF.EmitBlock(AllocElseBB);
9880     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9881         LeftToFrom,
9882         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9883     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9884     // In case of to, clear OMP_MAP_FROM.
9885     MapperCGF.EmitBlock(ToBB);
9886     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9887         MemberMapType,
9888         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9889     MapperCGF.Builder.CreateBr(EndBB);
9890     MapperCGF.EmitBlock(ToElseBB);
9891     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9892         LeftToFrom,
9893         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9894     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9895     // In case of from, clear OMP_MAP_TO.
9896     MapperCGF.EmitBlock(FromBB);
9897     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9898         MemberMapType,
9899         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9900     // In case of tofrom, do nothing.
9901     MapperCGF.EmitBlock(EndBB);
9902     LastBB = EndBB;
9903     llvm::PHINode *CurMapType =
9904         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9905     CurMapType->addIncoming(AllocMapType, AllocBB);
9906     CurMapType->addIncoming(ToMapType, ToBB);
9907     CurMapType->addIncoming(FromMapType, FromBB);
9908     CurMapType->addIncoming(MemberMapType, ToElseBB);
9909 
9910     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9911                                      CurSizeArg, CurMapType, CurNameArg};
9912     if (Info.Mappers[I]) {
9913       // Call the corresponding mapper function.
9914       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9915           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9916       assert(MapperFunc && "Expect a valid mapper function is available.");
9917       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9918     } else {
9919       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9920       // data structure.
9921       MapperCGF.EmitRuntimeCall(
9922           OMPBuilder.getOrCreateRuntimeFunction(
9923               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9924           OffloadingArgs);
9925     }
9926   }
9927 
9928   // Update the pointer to point to the next element that needs to be mapped,
9929   // and check whether we have mapped all elements.
9930   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9931       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9932   PtrPHI->addIncoming(PtrNext, LastBB);
9933   llvm::Value *IsDone =
9934       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9935   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9936   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9937 
9938   MapperCGF.EmitBlock(ExitBB);
9939   // Emit array deletion if this is an array section and \p MapType indicates
9940   // that deletion is required.
9941   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9942                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9943 
9944   // Emit the function exit block.
9945   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9946   MapperCGF.FinishFunction();
9947   UDMMap.try_emplace(D, Fn);
9948   if (CGF) {
9949     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9950     Decls.second.push_back(D);
9951   }
9952 }
9953 
9954 /// Emit the array initialization or deletion portion for user-defined mapper
9955 /// code generation. First, it evaluates whether an array section is mapped and
9956 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9957 /// true, and \a MapType indicates to not delete this array, array
9958 /// initialization code is generated. If \a IsInit is false, and \a MapType
9959 /// indicates to not this array, array deletion code is generated.
9960 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9961     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9962     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9963     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9964     bool IsInit) {
9965   StringRef Prefix = IsInit ? ".init" : ".del";
9966 
9967   // Evaluate if this is an array section.
9968   llvm::BasicBlock *BodyBB =
9969       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9970   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9971       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9972   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9973       MapType,
9974       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9975   llvm::Value *DeleteCond;
9976   llvm::Value *Cond;
9977   if (IsInit) {
9978     // base != begin?
9979     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9980         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9981     // IsPtrAndObj?
9982     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9983         MapType,
9984         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9985     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9986     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9987     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9988     DeleteCond = MapperCGF.Builder.CreateIsNull(
9989         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9990   } else {
9991     Cond = IsArray;
9992     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9993         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9994   }
9995   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9996   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9997 
9998   MapperCGF.EmitBlock(BodyBB);
9999   // Get the array size by multiplying element size and element number (i.e., \p
10000   // Size).
10001   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10002       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10003   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10004   // memory allocation/deletion purpose only.
10005   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10006       MapType,
10007       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10008                                    MappableExprsHandler::OMP_MAP_FROM)));
10009   MapTypeArg = MapperCGF.Builder.CreateOr(
10010       MapTypeArg,
10011       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10012 
10013   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10014   // data structure.
10015   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10016                                    ArraySize, MapTypeArg, MapName};
10017   MapperCGF.EmitRuntimeCall(
10018       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10019                                             OMPRTL___tgt_push_mapper_component),
10020       OffloadingArgs);
10021 }
10022 
10023 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10024     const OMPDeclareMapperDecl *D) {
10025   auto I = UDMMap.find(D);
10026   if (I != UDMMap.end())
10027     return I->second;
10028   emitUserDefinedMapper(D);
10029   return UDMMap.lookup(D);
10030 }
10031 
10032 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10033     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10034     llvm::Value *DeviceID,
10035     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10036                                      const OMPLoopDirective &D)>
10037         SizeEmitter) {
10038   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10039   const OMPExecutableDirective *TD = &D;
10040   // Get nested teams distribute kind directive, if any.
10041   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10042     TD = getNestedDistributeDirective(CGM.getContext(), D);
10043   if (!TD)
10044     return;
10045   const auto *LD = cast<OMPLoopDirective>(TD);
10046   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10047                                                          PrePostActionTy &) {
10048     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10049       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10050       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10051       CGF.EmitRuntimeCall(
10052           OMPBuilder.getOrCreateRuntimeFunction(
10053               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10054           Args);
10055     }
10056   };
10057   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10058 }
10059 
10060 void CGOpenMPRuntime::emitTargetCall(
10061     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10062     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10063     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10064     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10065                                      const OMPLoopDirective &D)>
10066         SizeEmitter) {
10067   if (!CGF.HaveInsertPoint())
10068     return;
10069 
10070   assert(OutlinedFn && "Invalid outlined function!");
10071 
10072   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10073                                  D.hasClausesOfKind<OMPNowaitClause>();
10074   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10075   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10076   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10077                                             PrePostActionTy &) {
10078     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10079   };
10080   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10081 
10082   CodeGenFunction::OMPTargetDataInfo InputInfo;
10083   llvm::Value *MapTypesArray = nullptr;
10084   llvm::Value *MapNamesArray = nullptr;
10085   // Fill up the pointer arrays and transfer execution to the device.
10086   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10087                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10088                     &CapturedVars,
10089                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10090     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10091       // Reverse offloading is not supported, so just execute on the host.
10092       if (RequiresOuterTask) {
10093         CapturedVars.clear();
10094         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10095       }
10096       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10097       return;
10098     }
10099 
10100     // On top of the arrays that were filled up, the target offloading call
10101     // takes as arguments the device id as well as the host pointer. The host
10102     // pointer is used by the runtime library to identify the current target
10103     // region, so it only has to be unique and not necessarily point to
10104     // anything. It could be the pointer to the outlined function that
10105     // implements the target region, but we aren't using that so that the
10106     // compiler doesn't need to keep that, and could therefore inline the host
10107     // function if proven worthwhile during optimization.
10108 
10109     // From this point on, we need to have an ID of the target region defined.
10110     assert(OutlinedFnID && "Invalid outlined function ID!");
10111 
10112     // Emit device ID if any.
10113     llvm::Value *DeviceID;
10114     if (Device.getPointer()) {
10115       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10116               Device.getInt() == OMPC_DEVICE_device_num) &&
10117              "Expected device_num modifier.");
10118       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10119       DeviceID =
10120           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10121     } else {
10122       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10123     }
10124 
10125     // Emit the number of elements in the offloading arrays.
10126     llvm::Value *PointerNum =
10127         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10128 
10129     // Return value of the runtime offloading call.
10130     llvm::Value *Return;
10131 
10132     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10133     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10134 
10135     // Source location for the ident struct
10136     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10137 
10138     // Emit tripcount for the target loop-based directive.
10139     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10140 
10141     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10142     // The target region is an outlined function launched by the runtime
10143     // via calls __tgt_target() or __tgt_target_teams().
10144     //
10145     // __tgt_target() launches a target region with one team and one thread,
10146     // executing a serial region.  This master thread may in turn launch
10147     // more threads within its team upon encountering a parallel region,
10148     // however, no additional teams can be launched on the device.
10149     //
10150     // __tgt_target_teams() launches a target region with one or more teams,
10151     // each with one or more threads.  This call is required for target
10152     // constructs such as:
10153     //  'target teams'
10154     //  'target' / 'teams'
10155     //  'target teams distribute parallel for'
10156     //  'target parallel'
10157     // and so on.
10158     //
10159     // Note that on the host and CPU targets, the runtime implementation of
10160     // these calls simply call the outlined function without forking threads.
10161     // The outlined functions themselves have runtime calls to
10162     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10163     // the compiler in emitTeamsCall() and emitParallelCall().
10164     //
10165     // In contrast, on the NVPTX target, the implementation of
10166     // __tgt_target_teams() launches a GPU kernel with the requested number
10167     // of teams and threads so no additional calls to the runtime are required.
10168     if (NumTeams) {
10169       // If we have NumTeams defined this means that we have an enclosed teams
10170       // region. Therefore we also expect to have NumThreads defined. These two
10171       // values should be defined in the presence of a teams directive,
10172       // regardless of having any clauses associated. If the user is using teams
10173       // but no clauses, these two values will be the default that should be
10174       // passed to the runtime library - a 32-bit integer with the value zero.
10175       assert(NumThreads && "Thread limit expression should be available along "
10176                            "with number of teams.");
10177       llvm::Value *OffloadingArgs[] = {RTLoc,
10178                                        DeviceID,
10179                                        OutlinedFnID,
10180                                        PointerNum,
10181                                        InputInfo.BasePointersArray.getPointer(),
10182                                        InputInfo.PointersArray.getPointer(),
10183                                        InputInfo.SizesArray.getPointer(),
10184                                        MapTypesArray,
10185                                        MapNamesArray,
10186                                        InputInfo.MappersArray.getPointer(),
10187                                        NumTeams,
10188                                        NumThreads};
10189       Return = CGF.EmitRuntimeCall(
10190           OMPBuilder.getOrCreateRuntimeFunction(
10191               CGM.getModule(), HasNowait
10192                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10193                                    : OMPRTL___tgt_target_teams_mapper),
10194           OffloadingArgs);
10195     } else {
10196       llvm::Value *OffloadingArgs[] = {RTLoc,
10197                                        DeviceID,
10198                                        OutlinedFnID,
10199                                        PointerNum,
10200                                        InputInfo.BasePointersArray.getPointer(),
10201                                        InputInfo.PointersArray.getPointer(),
10202                                        InputInfo.SizesArray.getPointer(),
10203                                        MapTypesArray,
10204                                        MapNamesArray,
10205                                        InputInfo.MappersArray.getPointer()};
10206       Return = CGF.EmitRuntimeCall(
10207           OMPBuilder.getOrCreateRuntimeFunction(
10208               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10209                                          : OMPRTL___tgt_target_mapper),
10210           OffloadingArgs);
10211     }
10212 
10213     // Check the error code and execute the host version if required.
10214     llvm::BasicBlock *OffloadFailedBlock =
10215         CGF.createBasicBlock("omp_offload.failed");
10216     llvm::BasicBlock *OffloadContBlock =
10217         CGF.createBasicBlock("omp_offload.cont");
10218     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10219     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10220 
10221     CGF.EmitBlock(OffloadFailedBlock);
10222     if (RequiresOuterTask) {
10223       CapturedVars.clear();
10224       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10225     }
10226     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10227     CGF.EmitBranch(OffloadContBlock);
10228 
10229     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10230   };
10231 
10232   // Notify that the host version must be executed.
10233   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10234                     RequiresOuterTask](CodeGenFunction &CGF,
10235                                        PrePostActionTy &) {
10236     if (RequiresOuterTask) {
10237       CapturedVars.clear();
10238       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10239     }
10240     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10241   };
10242 
10243   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10244                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10245                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10246     // Fill up the arrays with all the captured variables.
10247     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10248 
10249     // Get mappable expression information.
10250     MappableExprsHandler MEHandler(D, CGF);
10251     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10252     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10253 
10254     auto RI = CS.getCapturedRecordDecl()->field_begin();
10255     auto *CV = CapturedVars.begin();
10256     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10257                                               CE = CS.capture_end();
10258          CI != CE; ++CI, ++RI, ++CV) {
10259       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10260       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10261 
10262       // VLA sizes are passed to the outlined region by copy and do not have map
10263       // information associated.
10264       if (CI->capturesVariableArrayType()) {
10265         CurInfo.Exprs.push_back(nullptr);
10266         CurInfo.BasePointers.push_back(*CV);
10267         CurInfo.Pointers.push_back(*CV);
10268         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10269             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10270         // Copy to the device as an argument. No need to retrieve it.
10271         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10272                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10273                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10274         CurInfo.Mappers.push_back(nullptr);
10275       } else {
10276         // If we have any information in the map clause, we use it, otherwise we
10277         // just do a default mapping.
10278         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10279         if (!CI->capturesThis())
10280           MappedVarSet.insert(CI->getCapturedVar());
10281         else
10282           MappedVarSet.insert(nullptr);
10283         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10284           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10285         // Generate correct mapping for variables captured by reference in
10286         // lambdas.
10287         if (CI->capturesVariable())
10288           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10289                                                   CurInfo, LambdaPointers);
10290       }
10291       // We expect to have at least an element of information for this capture.
10292       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10293              "Non-existing map pointer for capture!");
10294       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10295              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10296              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10297              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10298              "Inconsistent map information sizes!");
10299 
10300       // If there is an entry in PartialStruct it means we have a struct with
10301       // individual members mapped. Emit an extra combined entry.
10302       if (PartialStruct.Base.isValid()) {
10303         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10304         MEHandler.emitCombinedEntry(
10305             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10306             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10307       }
10308 
10309       // We need to append the results of this capture to what we already have.
10310       CombinedInfo.append(CurInfo);
10311     }
10312     // Adjust MEMBER_OF flags for the lambdas captures.
10313     MEHandler.adjustMemberOfForLambdaCaptures(
10314         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10315         CombinedInfo.Types);
10316     // Map any list items in a map clause that were not captures because they
10317     // weren't referenced within the construct.
10318     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10319 
10320     TargetDataInfo Info;
10321     // Fill up the arrays and create the arguments.
10322     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10323     emitOffloadingArraysArgument(
10324         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10325         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10326         {/*ForEndTask=*/false});
10327 
10328     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10329     InputInfo.BasePointersArray =
10330         Address(Info.BasePointersArray, CGM.getPointerAlign());
10331     InputInfo.PointersArray =
10332         Address(Info.PointersArray, CGM.getPointerAlign());
10333     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10334     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10335     MapTypesArray = Info.MapTypesArray;
10336     MapNamesArray = Info.MapNamesArray;
10337     if (RequiresOuterTask)
10338       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10339     else
10340       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10341   };
10342 
10343   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10344                              CodeGenFunction &CGF, PrePostActionTy &) {
10345     if (RequiresOuterTask) {
10346       CodeGenFunction::OMPTargetDataInfo InputInfo;
10347       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10348     } else {
10349       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10350     }
10351   };
10352 
10353   // If we have a target function ID it means that we need to support
10354   // offloading, otherwise, just execute on the host. We need to execute on host
10355   // regardless of the conditional in the if clause if, e.g., the user do not
10356   // specify target triples.
10357   if (OutlinedFnID) {
10358     if (IfCond) {
10359       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10360     } else {
10361       RegionCodeGenTy ThenRCG(TargetThenGen);
10362       ThenRCG(CGF);
10363     }
10364   } else {
10365     RegionCodeGenTy ElseRCG(TargetElseGen);
10366     ElseRCG(CGF);
10367   }
10368 }
10369 
10370 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10371                                                     StringRef ParentName) {
10372   if (!S)
10373     return;
10374 
10375   // Codegen OMP target directives that offload compute to the device.
10376   bool RequiresDeviceCodegen =
10377       isa<OMPExecutableDirective>(S) &&
10378       isOpenMPTargetExecutionDirective(
10379           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10380 
10381   if (RequiresDeviceCodegen) {
10382     const auto &E = *cast<OMPExecutableDirective>(S);
10383     unsigned DeviceID;
10384     unsigned FileID;
10385     unsigned Line;
10386     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10387                              FileID, Line);
10388 
10389     // Is this a target region that should not be emitted as an entry point? If
10390     // so just signal we are done with this target region.
10391     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10392                                                             ParentName, Line))
10393       return;
10394 
10395     switch (E.getDirectiveKind()) {
10396     case OMPD_target:
10397       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10398                                                    cast<OMPTargetDirective>(E));
10399       break;
10400     case OMPD_target_parallel:
10401       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10402           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10403       break;
10404     case OMPD_target_teams:
10405       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10406           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10407       break;
10408     case OMPD_target_teams_distribute:
10409       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10410           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10411       break;
10412     case OMPD_target_teams_distribute_simd:
10413       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10414           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10415       break;
10416     case OMPD_target_parallel_for:
10417       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10418           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10419       break;
10420     case OMPD_target_parallel_for_simd:
10421       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10422           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10423       break;
10424     case OMPD_target_simd:
10425       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10426           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10427       break;
10428     case OMPD_target_teams_distribute_parallel_for:
10429       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10430           CGM, ParentName,
10431           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10432       break;
10433     case OMPD_target_teams_distribute_parallel_for_simd:
10434       CodeGenFunction::
10435           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10436               CGM, ParentName,
10437               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10438       break;
10439     case OMPD_parallel:
10440     case OMPD_for:
10441     case OMPD_parallel_for:
10442     case OMPD_parallel_master:
10443     case OMPD_parallel_sections:
10444     case OMPD_for_simd:
10445     case OMPD_parallel_for_simd:
10446     case OMPD_cancel:
10447     case OMPD_cancellation_point:
10448     case OMPD_ordered:
10449     case OMPD_threadprivate:
10450     case OMPD_allocate:
10451     case OMPD_task:
10452     case OMPD_simd:
10453     case OMPD_tile:
10454     case OMPD_sections:
10455     case OMPD_section:
10456     case OMPD_single:
10457     case OMPD_master:
10458     case OMPD_critical:
10459     case OMPD_taskyield:
10460     case OMPD_barrier:
10461     case OMPD_taskwait:
10462     case OMPD_taskgroup:
10463     case OMPD_atomic:
10464     case OMPD_flush:
10465     case OMPD_depobj:
10466     case OMPD_scan:
10467     case OMPD_teams:
10468     case OMPD_target_data:
10469     case OMPD_target_exit_data:
10470     case OMPD_target_enter_data:
10471     case OMPD_distribute:
10472     case OMPD_distribute_simd:
10473     case OMPD_distribute_parallel_for:
10474     case OMPD_distribute_parallel_for_simd:
10475     case OMPD_teams_distribute:
10476     case OMPD_teams_distribute_simd:
10477     case OMPD_teams_distribute_parallel_for:
10478     case OMPD_teams_distribute_parallel_for_simd:
10479     case OMPD_target_update:
10480     case OMPD_declare_simd:
10481     case OMPD_declare_variant:
10482     case OMPD_begin_declare_variant:
10483     case OMPD_end_declare_variant:
10484     case OMPD_declare_target:
10485     case OMPD_end_declare_target:
10486     case OMPD_declare_reduction:
10487     case OMPD_declare_mapper:
10488     case OMPD_taskloop:
10489     case OMPD_taskloop_simd:
10490     case OMPD_master_taskloop:
10491     case OMPD_master_taskloop_simd:
10492     case OMPD_parallel_master_taskloop:
10493     case OMPD_parallel_master_taskloop_simd:
10494     case OMPD_requires:
10495     case OMPD_unknown:
10496     default:
10497       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10498     }
10499     return;
10500   }
10501 
10502   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10503     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10504       return;
10505 
10506     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10507     return;
10508   }
10509 
10510   // If this is a lambda function, look into its body.
10511   if (const auto *L = dyn_cast<LambdaExpr>(S))
10512     S = L->getBody();
10513 
10514   // Keep looking for target regions recursively.
10515   for (const Stmt *II : S->children())
10516     scanForTargetRegionsFunctions(II, ParentName);
10517 }
10518 
10519 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10520   // If emitting code for the host, we do not process FD here. Instead we do
10521   // the normal code generation.
10522   if (!CGM.getLangOpts().OpenMPIsDevice) {
10523     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10524       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10525           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10526       // Do not emit device_type(nohost) functions for the host.
10527       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10528         return true;
10529     }
10530     return false;
10531   }
10532 
10533   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10534   // Try to detect target regions in the function.
10535   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10536     StringRef Name = CGM.getMangledName(GD);
10537     scanForTargetRegionsFunctions(FD->getBody(), Name);
10538     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10539         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10540     // Do not emit device_type(nohost) functions for the host.
10541     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10542       return true;
10543   }
10544 
10545   // Do not to emit function if it is not marked as declare target.
10546   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10547          AlreadyEmittedTargetDecls.count(VD) == 0;
10548 }
10549 
10550 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10551   if (!CGM.getLangOpts().OpenMPIsDevice)
10552     return false;
10553 
10554   // Check if there are Ctors/Dtors in this declaration and look for target
10555   // regions in it. We use the complete variant to produce the kernel name
10556   // mangling.
10557   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10558   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10559     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10560       StringRef ParentName =
10561           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10562       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10563     }
10564     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10565       StringRef ParentName =
10566           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10567       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10568     }
10569   }
10570 
10571   // Do not to emit variable if it is not marked as declare target.
10572   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10573       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10574           cast<VarDecl>(GD.getDecl()));
10575   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10576       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10577        HasRequiresUnifiedSharedMemory)) {
10578     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10579     return true;
10580   }
10581   return false;
10582 }
10583 
10584 llvm::Constant *
10585 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10586                                                 const VarDecl *VD) {
10587   assert(VD->getType().isConstant(CGM.getContext()) &&
10588          "Expected constant variable.");
10589   StringRef VarName;
10590   llvm::Constant *Addr;
10591   llvm::GlobalValue::LinkageTypes Linkage;
10592   QualType Ty = VD->getType();
10593   SmallString<128> Buffer;
10594   {
10595     unsigned DeviceID;
10596     unsigned FileID;
10597     unsigned Line;
10598     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10599                              FileID, Line);
10600     llvm::raw_svector_ostream OS(Buffer);
10601     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10602        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10603     VarName = OS.str();
10604   }
10605   Linkage = llvm::GlobalValue::InternalLinkage;
10606   Addr =
10607       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10608                                   getDefaultFirstprivateAddressSpace());
10609   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10610   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10611   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10612   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10613       VarName, Addr, VarSize,
10614       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10615   return Addr;
10616 }
10617 
10618 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10619                                                    llvm::Constant *Addr) {
10620   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10621       !CGM.getLangOpts().OpenMPIsDevice)
10622     return;
10623   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10624       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10625   if (!Res) {
10626     if (CGM.getLangOpts().OpenMPIsDevice) {
10627       // Register non-target variables being emitted in device code (debug info
10628       // may cause this).
10629       StringRef VarName = CGM.getMangledName(VD);
10630       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10631     }
10632     return;
10633   }
10634   // Register declare target variables.
10635   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10636   StringRef VarName;
10637   CharUnits VarSize;
10638   llvm::GlobalValue::LinkageTypes Linkage;
10639 
10640   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10641       !HasRequiresUnifiedSharedMemory) {
10642     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10643     VarName = CGM.getMangledName(VD);
10644     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10645       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10646       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10647     } else {
10648       VarSize = CharUnits::Zero();
10649     }
10650     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10651     // Temp solution to prevent optimizations of the internal variables.
10652     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10653       std::string RefName = getName({VarName, "ref"});
10654       if (!CGM.GetGlobalValue(RefName)) {
10655         llvm::Constant *AddrRef =
10656             getOrCreateInternalVariable(Addr->getType(), RefName);
10657         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10658         GVAddrRef->setConstant(/*Val=*/true);
10659         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10660         GVAddrRef->setInitializer(Addr);
10661         CGM.addCompilerUsedGlobal(GVAddrRef);
10662       }
10663     }
10664   } else {
10665     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10666             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10667              HasRequiresUnifiedSharedMemory)) &&
10668            "Declare target attribute must link or to with unified memory.");
10669     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10670       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10671     else
10672       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10673 
10674     if (CGM.getLangOpts().OpenMPIsDevice) {
10675       VarName = Addr->getName();
10676       Addr = nullptr;
10677     } else {
10678       VarName = getAddrOfDeclareTargetVar(VD).getName();
10679       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10680     }
10681     VarSize = CGM.getPointerSize();
10682     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10683   }
10684 
10685   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10686       VarName, Addr, VarSize, Flags, Linkage);
10687 }
10688 
10689 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10690   if (isa<FunctionDecl>(GD.getDecl()) ||
10691       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10692     return emitTargetFunctions(GD);
10693 
10694   return emitTargetGlobalVariable(GD);
10695 }
10696 
10697 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10698   for (const VarDecl *VD : DeferredGlobalVariables) {
10699     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10700         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10701     if (!Res)
10702       continue;
10703     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10704         !HasRequiresUnifiedSharedMemory) {
10705       CGM.EmitGlobal(VD);
10706     } else {
10707       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10708               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10709                HasRequiresUnifiedSharedMemory)) &&
10710              "Expected link clause or to clause with unified memory.");
10711       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10712     }
10713   }
10714 }
10715 
10716 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10717     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10718   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10719          " Expected target-based directive.");
10720 }
10721 
10722 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10723   for (const OMPClause *Clause : D->clauselists()) {
10724     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10725       HasRequiresUnifiedSharedMemory = true;
10726     } else if (const auto *AC =
10727                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10728       switch (AC->getAtomicDefaultMemOrderKind()) {
10729       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10730         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10731         break;
10732       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10733         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10734         break;
10735       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10736         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10737         break;
10738       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10739         break;
10740       }
10741     }
10742   }
10743 }
10744 
10745 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10746   return RequiresAtomicOrdering;
10747 }
10748 
10749 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10750                                                        LangAS &AS) {
10751   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10752     return false;
10753   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10754   switch(A->getAllocatorType()) {
10755   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10756   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10757   // Not supported, fallback to the default mem space.
10758   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10759   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10760   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10761   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10762   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10763   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10764   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10765     AS = LangAS::Default;
10766     return true;
10767   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10768     llvm_unreachable("Expected predefined allocator for the variables with the "
10769                      "static storage.");
10770   }
10771   return false;
10772 }
10773 
10774 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10775   return HasRequiresUnifiedSharedMemory;
10776 }
10777 
10778 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10779     CodeGenModule &CGM)
10780     : CGM(CGM) {
10781   if (CGM.getLangOpts().OpenMPIsDevice) {
10782     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10783     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10784   }
10785 }
10786 
10787 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10788   if (CGM.getLangOpts().OpenMPIsDevice)
10789     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10790 }
10791 
10792 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10793   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10794     return true;
10795 
10796   const auto *D = cast<FunctionDecl>(GD.getDecl());
10797   // Do not to emit function if it is marked as declare target as it was already
10798   // emitted.
10799   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10800     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10801       if (auto *F = dyn_cast_or_null<llvm::Function>(
10802               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10803         return !F->isDeclaration();
10804       return false;
10805     }
10806     return true;
10807   }
10808 
10809   return !AlreadyEmittedTargetDecls.insert(D).second;
10810 }
10811 
10812 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10813   // If we don't have entries or if we are emitting code for the device, we
10814   // don't need to do anything.
10815   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10816       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10817       (OffloadEntriesInfoManager.empty() &&
10818        !HasEmittedDeclareTargetRegion &&
10819        !HasEmittedTargetRegion))
10820     return nullptr;
10821 
10822   // Create and register the function that handles the requires directives.
10823   ASTContext &C = CGM.getContext();
10824 
10825   llvm::Function *RequiresRegFn;
10826   {
10827     CodeGenFunction CGF(CGM);
10828     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10829     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10830     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10831     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10832     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10833     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10834     // TODO: check for other requires clauses.
10835     // The requires directive takes effect only when a target region is
10836     // present in the compilation unit. Otherwise it is ignored and not
10837     // passed to the runtime. This avoids the runtime from throwing an error
10838     // for mismatching requires clauses across compilation units that don't
10839     // contain at least 1 target region.
10840     assert((HasEmittedTargetRegion ||
10841             HasEmittedDeclareTargetRegion ||
10842             !OffloadEntriesInfoManager.empty()) &&
10843            "Target or declare target region expected.");
10844     if (HasRequiresUnifiedSharedMemory)
10845       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10846     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10847                             CGM.getModule(), OMPRTL___tgt_register_requires),
10848                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10849     CGF.FinishFunction();
10850   }
10851   return RequiresRegFn;
10852 }
10853 
10854 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10855                                     const OMPExecutableDirective &D,
10856                                     SourceLocation Loc,
10857                                     llvm::Function *OutlinedFn,
10858                                     ArrayRef<llvm::Value *> CapturedVars) {
10859   if (!CGF.HaveInsertPoint())
10860     return;
10861 
10862   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10863   CodeGenFunction::RunCleanupsScope Scope(CGF);
10864 
10865   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10866   llvm::Value *Args[] = {
10867       RTLoc,
10868       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10869       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10870   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10871   RealArgs.append(std::begin(Args), std::end(Args));
10872   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10873 
10874   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10875       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10876   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10877 }
10878 
10879 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10880                                          const Expr *NumTeams,
10881                                          const Expr *ThreadLimit,
10882                                          SourceLocation Loc) {
10883   if (!CGF.HaveInsertPoint())
10884     return;
10885 
10886   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10887 
10888   llvm::Value *NumTeamsVal =
10889       NumTeams
10890           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10891                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10892           : CGF.Builder.getInt32(0);
10893 
10894   llvm::Value *ThreadLimitVal =
10895       ThreadLimit
10896           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10897                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10898           : CGF.Builder.getInt32(0);
10899 
10900   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10901   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10902                                      ThreadLimitVal};
10903   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10904                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10905                       PushNumTeamsArgs);
10906 }
10907 
10908 void CGOpenMPRuntime::emitTargetDataCalls(
10909     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10910     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10911   if (!CGF.HaveInsertPoint())
10912     return;
10913 
10914   // Action used to replace the default codegen action and turn privatization
10915   // off.
10916   PrePostActionTy NoPrivAction;
10917 
10918   // Generate the code for the opening of the data environment. Capture all the
10919   // arguments of the runtime call by reference because they are used in the
10920   // closing of the region.
10921   auto &&BeginThenGen = [this, &D, Device, &Info,
10922                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10923     // Fill up the arrays with all the mapped variables.
10924     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10925 
10926     // Get map clause information.
10927     MappableExprsHandler MEHandler(D, CGF);
10928     MEHandler.generateAllInfo(CombinedInfo);
10929 
10930     // Fill up the arrays and create the arguments.
10931     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10932                          /*IsNonContiguous=*/true);
10933 
10934     llvm::Value *BasePointersArrayArg = nullptr;
10935     llvm::Value *PointersArrayArg = nullptr;
10936     llvm::Value *SizesArrayArg = nullptr;
10937     llvm::Value *MapTypesArrayArg = nullptr;
10938     llvm::Value *MapNamesArrayArg = nullptr;
10939     llvm::Value *MappersArrayArg = nullptr;
10940     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10941                                  SizesArrayArg, MapTypesArrayArg,
10942                                  MapNamesArrayArg, MappersArrayArg, Info);
10943 
10944     // Emit device ID if any.
10945     llvm::Value *DeviceID = nullptr;
10946     if (Device) {
10947       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10948                                            CGF.Int64Ty, /*isSigned=*/true);
10949     } else {
10950       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10951     }
10952 
10953     // Emit the number of elements in the offloading arrays.
10954     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10955     //
10956     // Source location for the ident struct
10957     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10958 
10959     llvm::Value *OffloadingArgs[] = {RTLoc,
10960                                      DeviceID,
10961                                      PointerNum,
10962                                      BasePointersArrayArg,
10963                                      PointersArrayArg,
10964                                      SizesArrayArg,
10965                                      MapTypesArrayArg,
10966                                      MapNamesArrayArg,
10967                                      MappersArrayArg};
10968     CGF.EmitRuntimeCall(
10969         OMPBuilder.getOrCreateRuntimeFunction(
10970             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10971         OffloadingArgs);
10972 
10973     // If device pointer privatization is required, emit the body of the region
10974     // here. It will have to be duplicated: with and without privatization.
10975     if (!Info.CaptureDeviceAddrMap.empty())
10976       CodeGen(CGF);
10977   };
10978 
10979   // Generate code for the closing of the data region.
10980   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10981                                                 PrePostActionTy &) {
10982     assert(Info.isValid() && "Invalid data environment closing arguments.");
10983 
10984     llvm::Value *BasePointersArrayArg = nullptr;
10985     llvm::Value *PointersArrayArg = nullptr;
10986     llvm::Value *SizesArrayArg = nullptr;
10987     llvm::Value *MapTypesArrayArg = nullptr;
10988     llvm::Value *MapNamesArrayArg = nullptr;
10989     llvm::Value *MappersArrayArg = nullptr;
10990     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10991                                  SizesArrayArg, MapTypesArrayArg,
10992                                  MapNamesArrayArg, MappersArrayArg, Info,
10993                                  {/*ForEndCall=*/true});
10994 
10995     // Emit device ID if any.
10996     llvm::Value *DeviceID = nullptr;
10997     if (Device) {
10998       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10999                                            CGF.Int64Ty, /*isSigned=*/true);
11000     } else {
11001       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11002     }
11003 
11004     // Emit the number of elements in the offloading arrays.
11005     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11006 
11007     // Source location for the ident struct
11008     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11009 
11010     llvm::Value *OffloadingArgs[] = {RTLoc,
11011                                      DeviceID,
11012                                      PointerNum,
11013                                      BasePointersArrayArg,
11014                                      PointersArrayArg,
11015                                      SizesArrayArg,
11016                                      MapTypesArrayArg,
11017                                      MapNamesArrayArg,
11018                                      MappersArrayArg};
11019     CGF.EmitRuntimeCall(
11020         OMPBuilder.getOrCreateRuntimeFunction(
11021             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11022         OffloadingArgs);
11023   };
11024 
11025   // If we need device pointer privatization, we need to emit the body of the
11026   // region with no privatization in the 'else' branch of the conditional.
11027   // Otherwise, we don't have to do anything.
11028   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11029                                                          PrePostActionTy &) {
11030     if (!Info.CaptureDeviceAddrMap.empty()) {
11031       CodeGen.setAction(NoPrivAction);
11032       CodeGen(CGF);
11033     }
11034   };
11035 
11036   // We don't have to do anything to close the region if the if clause evaluates
11037   // to false.
11038   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11039 
11040   if (IfCond) {
11041     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11042   } else {
11043     RegionCodeGenTy RCG(BeginThenGen);
11044     RCG(CGF);
11045   }
11046 
11047   // If we don't require privatization of device pointers, we emit the body in
11048   // between the runtime calls. This avoids duplicating the body code.
11049   if (Info.CaptureDeviceAddrMap.empty()) {
11050     CodeGen.setAction(NoPrivAction);
11051     CodeGen(CGF);
11052   }
11053 
11054   if (IfCond) {
11055     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11056   } else {
11057     RegionCodeGenTy RCG(EndThenGen);
11058     RCG(CGF);
11059   }
11060 }
11061 
11062 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11063     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11064     const Expr *Device) {
11065   if (!CGF.HaveInsertPoint())
11066     return;
11067 
11068   assert((isa<OMPTargetEnterDataDirective>(D) ||
11069           isa<OMPTargetExitDataDirective>(D) ||
11070           isa<OMPTargetUpdateDirective>(D)) &&
11071          "Expecting either target enter, exit data, or update directives.");
11072 
11073   CodeGenFunction::OMPTargetDataInfo InputInfo;
11074   llvm::Value *MapTypesArray = nullptr;
11075   llvm::Value *MapNamesArray = nullptr;
11076   // Generate the code for the opening of the data environment.
11077   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11078                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11079     // Emit device ID if any.
11080     llvm::Value *DeviceID = nullptr;
11081     if (Device) {
11082       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11083                                            CGF.Int64Ty, /*isSigned=*/true);
11084     } else {
11085       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11086     }
11087 
11088     // Emit the number of elements in the offloading arrays.
11089     llvm::Constant *PointerNum =
11090         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11091 
11092     // Source location for the ident struct
11093     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11094 
11095     llvm::Value *OffloadingArgs[] = {RTLoc,
11096                                      DeviceID,
11097                                      PointerNum,
11098                                      InputInfo.BasePointersArray.getPointer(),
11099                                      InputInfo.PointersArray.getPointer(),
11100                                      InputInfo.SizesArray.getPointer(),
11101                                      MapTypesArray,
11102                                      MapNamesArray,
11103                                      InputInfo.MappersArray.getPointer()};
11104 
11105     // Select the right runtime function call for each standalone
11106     // directive.
11107     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11108     RuntimeFunction RTLFn;
11109     switch (D.getDirectiveKind()) {
11110     case OMPD_target_enter_data:
11111       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11112                         : OMPRTL___tgt_target_data_begin_mapper;
11113       break;
11114     case OMPD_target_exit_data:
11115       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11116                         : OMPRTL___tgt_target_data_end_mapper;
11117       break;
11118     case OMPD_target_update:
11119       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11120                         : OMPRTL___tgt_target_data_update_mapper;
11121       break;
11122     case OMPD_parallel:
11123     case OMPD_for:
11124     case OMPD_parallel_for:
11125     case OMPD_parallel_master:
11126     case OMPD_parallel_sections:
11127     case OMPD_for_simd:
11128     case OMPD_parallel_for_simd:
11129     case OMPD_cancel:
11130     case OMPD_cancellation_point:
11131     case OMPD_ordered:
11132     case OMPD_threadprivate:
11133     case OMPD_allocate:
11134     case OMPD_task:
11135     case OMPD_simd:
11136     case OMPD_tile:
11137     case OMPD_sections:
11138     case OMPD_section:
11139     case OMPD_single:
11140     case OMPD_master:
11141     case OMPD_critical:
11142     case OMPD_taskyield:
11143     case OMPD_barrier:
11144     case OMPD_taskwait:
11145     case OMPD_taskgroup:
11146     case OMPD_atomic:
11147     case OMPD_flush:
11148     case OMPD_depobj:
11149     case OMPD_scan:
11150     case OMPD_teams:
11151     case OMPD_target_data:
11152     case OMPD_distribute:
11153     case OMPD_distribute_simd:
11154     case OMPD_distribute_parallel_for:
11155     case OMPD_distribute_parallel_for_simd:
11156     case OMPD_teams_distribute:
11157     case OMPD_teams_distribute_simd:
11158     case OMPD_teams_distribute_parallel_for:
11159     case OMPD_teams_distribute_parallel_for_simd:
11160     case OMPD_declare_simd:
11161     case OMPD_declare_variant:
11162     case OMPD_begin_declare_variant:
11163     case OMPD_end_declare_variant:
11164     case OMPD_declare_target:
11165     case OMPD_end_declare_target:
11166     case OMPD_declare_reduction:
11167     case OMPD_declare_mapper:
11168     case OMPD_taskloop:
11169     case OMPD_taskloop_simd:
11170     case OMPD_master_taskloop:
11171     case OMPD_master_taskloop_simd:
11172     case OMPD_parallel_master_taskloop:
11173     case OMPD_parallel_master_taskloop_simd:
11174     case OMPD_target:
11175     case OMPD_target_simd:
11176     case OMPD_target_teams_distribute:
11177     case OMPD_target_teams_distribute_simd:
11178     case OMPD_target_teams_distribute_parallel_for:
11179     case OMPD_target_teams_distribute_parallel_for_simd:
11180     case OMPD_target_teams:
11181     case OMPD_target_parallel:
11182     case OMPD_target_parallel_for:
11183     case OMPD_target_parallel_for_simd:
11184     case OMPD_requires:
11185     case OMPD_unknown:
11186     default:
11187       llvm_unreachable("Unexpected standalone target data directive.");
11188       break;
11189     }
11190     CGF.EmitRuntimeCall(
11191         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11192         OffloadingArgs);
11193   };
11194 
11195   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11196                           &MapNamesArray](CodeGenFunction &CGF,
11197                                           PrePostActionTy &) {
11198     // Fill up the arrays with all the mapped variables.
11199     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11200 
11201     // Get map clause information.
11202     MappableExprsHandler MEHandler(D, CGF);
11203     MEHandler.generateAllInfo(CombinedInfo);
11204 
11205     TargetDataInfo Info;
11206     // Fill up the arrays and create the arguments.
11207     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11208                          /*IsNonContiguous=*/true);
11209     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11210                              D.hasClausesOfKind<OMPNowaitClause>();
11211     emitOffloadingArraysArgument(
11212         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11213         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11214         {/*ForEndTask=*/false});
11215     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11216     InputInfo.BasePointersArray =
11217         Address(Info.BasePointersArray, CGM.getPointerAlign());
11218     InputInfo.PointersArray =
11219         Address(Info.PointersArray, CGM.getPointerAlign());
11220     InputInfo.SizesArray =
11221         Address(Info.SizesArray, CGM.getPointerAlign());
11222     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11223     MapTypesArray = Info.MapTypesArray;
11224     MapNamesArray = Info.MapNamesArray;
11225     if (RequiresOuterTask)
11226       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11227     else
11228       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11229   };
11230 
11231   if (IfCond) {
11232     emitIfClause(CGF, IfCond, TargetThenGen,
11233                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11234   } else {
11235     RegionCodeGenTy ThenRCG(TargetThenGen);
11236     ThenRCG(CGF);
11237   }
11238 }
11239 
11240 namespace {
11241   /// Kind of parameter in a function with 'declare simd' directive.
11242   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11243   /// Attribute set of the parameter.
11244   struct ParamAttrTy {
11245     ParamKindTy Kind = Vector;
11246     llvm::APSInt StrideOrArg;
11247     llvm::APSInt Alignment;
11248   };
11249 } // namespace
11250 
11251 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11252                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11253   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11254   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11255   // of that clause. The VLEN value must be power of 2.
11256   // In other case the notion of the function`s "characteristic data type" (CDT)
11257   // is used to compute the vector length.
11258   // CDT is defined in the following order:
11259   //   a) For non-void function, the CDT is the return type.
11260   //   b) If the function has any non-uniform, non-linear parameters, then the
11261   //   CDT is the type of the first such parameter.
11262   //   c) If the CDT determined by a) or b) above is struct, union, or class
11263   //   type which is pass-by-value (except for the type that maps to the
11264   //   built-in complex data type), the characteristic data type is int.
11265   //   d) If none of the above three cases is applicable, the CDT is int.
11266   // The VLEN is then determined based on the CDT and the size of vector
11267   // register of that ISA for which current vector version is generated. The
11268   // VLEN is computed using the formula below:
11269   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11270   // where vector register size specified in section 3.2.1 Registers and the
11271   // Stack Frame of original AMD64 ABI document.
11272   QualType RetType = FD->getReturnType();
11273   if (RetType.isNull())
11274     return 0;
11275   ASTContext &C = FD->getASTContext();
11276   QualType CDT;
11277   if (!RetType.isNull() && !RetType->isVoidType()) {
11278     CDT = RetType;
11279   } else {
11280     unsigned Offset = 0;
11281     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11282       if (ParamAttrs[Offset].Kind == Vector)
11283         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11284       ++Offset;
11285     }
11286     if (CDT.isNull()) {
11287       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11288         if (ParamAttrs[I + Offset].Kind == Vector) {
11289           CDT = FD->getParamDecl(I)->getType();
11290           break;
11291         }
11292       }
11293     }
11294   }
11295   if (CDT.isNull())
11296     CDT = C.IntTy;
11297   CDT = CDT->getCanonicalTypeUnqualified();
11298   if (CDT->isRecordType() || CDT->isUnionType())
11299     CDT = C.IntTy;
11300   return C.getTypeSize(CDT);
11301 }
11302 
11303 static void
11304 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11305                            const llvm::APSInt &VLENVal,
11306                            ArrayRef<ParamAttrTy> ParamAttrs,
11307                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11308   struct ISADataTy {
11309     char ISA;
11310     unsigned VecRegSize;
11311   };
11312   ISADataTy ISAData[] = {
11313       {
11314           'b', 128
11315       }, // SSE
11316       {
11317           'c', 256
11318       }, // AVX
11319       {
11320           'd', 256
11321       }, // AVX2
11322       {
11323           'e', 512
11324       }, // AVX512
11325   };
11326   llvm::SmallVector<char, 2> Masked;
11327   switch (State) {
11328   case OMPDeclareSimdDeclAttr::BS_Undefined:
11329     Masked.push_back('N');
11330     Masked.push_back('M');
11331     break;
11332   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11333     Masked.push_back('N');
11334     break;
11335   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11336     Masked.push_back('M');
11337     break;
11338   }
11339   for (char Mask : Masked) {
11340     for (const ISADataTy &Data : ISAData) {
11341       SmallString<256> Buffer;
11342       llvm::raw_svector_ostream Out(Buffer);
11343       Out << "_ZGV" << Data.ISA << Mask;
11344       if (!VLENVal) {
11345         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11346         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11347         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11348       } else {
11349         Out << VLENVal;
11350       }
11351       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11352         switch (ParamAttr.Kind){
11353         case LinearWithVarStride:
11354           Out << 's' << ParamAttr.StrideOrArg;
11355           break;
11356         case Linear:
11357           Out << 'l';
11358           if (ParamAttr.StrideOrArg != 1)
11359             Out << ParamAttr.StrideOrArg;
11360           break;
11361         case Uniform:
11362           Out << 'u';
11363           break;
11364         case Vector:
11365           Out << 'v';
11366           break;
11367         }
11368         if (!!ParamAttr.Alignment)
11369           Out << 'a' << ParamAttr.Alignment;
11370       }
11371       Out << '_' << Fn->getName();
11372       Fn->addFnAttr(Out.str());
11373     }
11374   }
11375 }
11376 
11377 // This are the Functions that are needed to mangle the name of the
11378 // vector functions generated by the compiler, according to the rules
11379 // defined in the "Vector Function ABI specifications for AArch64",
11380 // available at
11381 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11382 
11383 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11384 ///
11385 /// TODO: Need to implement the behavior for reference marked with a
11386 /// var or no linear modifiers (1.b in the section). For this, we
11387 /// need to extend ParamKindTy to support the linear modifiers.
11388 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11389   QT = QT.getCanonicalType();
11390 
11391   if (QT->isVoidType())
11392     return false;
11393 
11394   if (Kind == ParamKindTy::Uniform)
11395     return false;
11396 
11397   if (Kind == ParamKindTy::Linear)
11398     return false;
11399 
11400   // TODO: Handle linear references with modifiers
11401 
11402   if (Kind == ParamKindTy::LinearWithVarStride)
11403     return false;
11404 
11405   return true;
11406 }
11407 
11408 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11409 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11410   QT = QT.getCanonicalType();
11411   unsigned Size = C.getTypeSize(QT);
11412 
11413   // Only scalars and complex within 16 bytes wide set PVB to true.
11414   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11415     return false;
11416 
11417   if (QT->isFloatingType())
11418     return true;
11419 
11420   if (QT->isIntegerType())
11421     return true;
11422 
11423   if (QT->isPointerType())
11424     return true;
11425 
11426   // TODO: Add support for complex types (section 3.1.2, item 2).
11427 
11428   return false;
11429 }
11430 
11431 /// Computes the lane size (LS) of a return type or of an input parameter,
11432 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11433 /// TODO: Add support for references, section 3.2.1, item 1.
11434 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11435   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11436     QualType PTy = QT.getCanonicalType()->getPointeeType();
11437     if (getAArch64PBV(PTy, C))
11438       return C.getTypeSize(PTy);
11439   }
11440   if (getAArch64PBV(QT, C))
11441     return C.getTypeSize(QT);
11442 
11443   return C.getTypeSize(C.getUIntPtrType());
11444 }
11445 
11446 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11447 // signature of the scalar function, as defined in 3.2.2 of the
11448 // AAVFABI.
11449 static std::tuple<unsigned, unsigned, bool>
11450 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11451   QualType RetType = FD->getReturnType().getCanonicalType();
11452 
11453   ASTContext &C = FD->getASTContext();
11454 
11455   bool OutputBecomesInput = false;
11456 
11457   llvm::SmallVector<unsigned, 8> Sizes;
11458   if (!RetType->isVoidType()) {
11459     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11460     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11461       OutputBecomesInput = true;
11462   }
11463   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11464     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11465     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11466   }
11467 
11468   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11469   // The LS of a function parameter / return value can only be a power
11470   // of 2, starting from 8 bits, up to 128.
11471   assert(std::all_of(Sizes.begin(), Sizes.end(),
11472                      [](unsigned Size) {
11473                        return Size == 8 || Size == 16 || Size == 32 ||
11474                               Size == 64 || Size == 128;
11475                      }) &&
11476          "Invalid size");
11477 
11478   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11479                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11480                          OutputBecomesInput);
11481 }
11482 
11483 /// Mangle the parameter part of the vector function name according to
11484 /// their OpenMP classification. The mangling function is defined in
11485 /// section 3.5 of the AAVFABI.
11486 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11487   SmallString<256> Buffer;
11488   llvm::raw_svector_ostream Out(Buffer);
11489   for (const auto &ParamAttr : ParamAttrs) {
11490     switch (ParamAttr.Kind) {
11491     case LinearWithVarStride:
11492       Out << "ls" << ParamAttr.StrideOrArg;
11493       break;
11494     case Linear:
11495       Out << 'l';
11496       // Don't print the step value if it is not present or if it is
11497       // equal to 1.
11498       if (ParamAttr.StrideOrArg != 1)
11499         Out << ParamAttr.StrideOrArg;
11500       break;
11501     case Uniform:
11502       Out << 'u';
11503       break;
11504     case Vector:
11505       Out << 'v';
11506       break;
11507     }
11508 
11509     if (!!ParamAttr.Alignment)
11510       Out << 'a' << ParamAttr.Alignment;
11511   }
11512 
11513   return std::string(Out.str());
11514 }
11515 
11516 // Function used to add the attribute. The parameter `VLEN` is
11517 // templated to allow the use of "x" when targeting scalable functions
11518 // for SVE.
11519 template <typename T>
11520 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11521                                  char ISA, StringRef ParSeq,
11522                                  StringRef MangledName, bool OutputBecomesInput,
11523                                  llvm::Function *Fn) {
11524   SmallString<256> Buffer;
11525   llvm::raw_svector_ostream Out(Buffer);
11526   Out << Prefix << ISA << LMask << VLEN;
11527   if (OutputBecomesInput)
11528     Out << "v";
11529   Out << ParSeq << "_" << MangledName;
11530   Fn->addFnAttr(Out.str());
11531 }
11532 
11533 // Helper function to generate the Advanced SIMD names depending on
11534 // the value of the NDS when simdlen is not present.
11535 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11536                                       StringRef Prefix, char ISA,
11537                                       StringRef ParSeq, StringRef MangledName,
11538                                       bool OutputBecomesInput,
11539                                       llvm::Function *Fn) {
11540   switch (NDS) {
11541   case 8:
11542     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11543                          OutputBecomesInput, Fn);
11544     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11545                          OutputBecomesInput, Fn);
11546     break;
11547   case 16:
11548     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11549                          OutputBecomesInput, Fn);
11550     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11551                          OutputBecomesInput, Fn);
11552     break;
11553   case 32:
11554     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11555                          OutputBecomesInput, Fn);
11556     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11557                          OutputBecomesInput, Fn);
11558     break;
11559   case 64:
11560   case 128:
11561     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11562                          OutputBecomesInput, Fn);
11563     break;
11564   default:
11565     llvm_unreachable("Scalar type is too wide.");
11566   }
11567 }
11568 
11569 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11570 static void emitAArch64DeclareSimdFunction(
11571     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11572     ArrayRef<ParamAttrTy> ParamAttrs,
11573     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11574     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11575 
11576   // Get basic data for building the vector signature.
11577   const auto Data = getNDSWDS(FD, ParamAttrs);
11578   const unsigned NDS = std::get<0>(Data);
11579   const unsigned WDS = std::get<1>(Data);
11580   const bool OutputBecomesInput = std::get<2>(Data);
11581 
11582   // Check the values provided via `simdlen` by the user.
11583   // 1. A `simdlen(1)` doesn't produce vector signatures,
11584   if (UserVLEN == 1) {
11585     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11586         DiagnosticsEngine::Warning,
11587         "The clause simdlen(1) has no effect when targeting aarch64.");
11588     CGM.getDiags().Report(SLoc, DiagID);
11589     return;
11590   }
11591 
11592   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11593   // Advanced SIMD output.
11594   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11595     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11596         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11597                                     "power of 2 when targeting Advanced SIMD.");
11598     CGM.getDiags().Report(SLoc, DiagID);
11599     return;
11600   }
11601 
11602   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11603   // limits.
11604   if (ISA == 's' && UserVLEN != 0) {
11605     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11606       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11607           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11608                                       "lanes in the architectural constraints "
11609                                       "for SVE (min is 128-bit, max is "
11610                                       "2048-bit, by steps of 128-bit)");
11611       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11612       return;
11613     }
11614   }
11615 
11616   // Sort out parameter sequence.
11617   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11618   StringRef Prefix = "_ZGV";
11619   // Generate simdlen from user input (if any).
11620   if (UserVLEN) {
11621     if (ISA == 's') {
11622       // SVE generates only a masked function.
11623       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11624                            OutputBecomesInput, Fn);
11625     } else {
11626       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11627       // Advanced SIMD generates one or two functions, depending on
11628       // the `[not]inbranch` clause.
11629       switch (State) {
11630       case OMPDeclareSimdDeclAttr::BS_Undefined:
11631         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11632                              OutputBecomesInput, Fn);
11633         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11634                              OutputBecomesInput, Fn);
11635         break;
11636       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11637         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11638                              OutputBecomesInput, Fn);
11639         break;
11640       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11641         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11642                              OutputBecomesInput, Fn);
11643         break;
11644       }
11645     }
11646   } else {
11647     // If no user simdlen is provided, follow the AAVFABI rules for
11648     // generating the vector length.
11649     if (ISA == 's') {
11650       // SVE, section 3.4.1, item 1.
11651       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11652                            OutputBecomesInput, Fn);
11653     } else {
11654       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11655       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11656       // two vector names depending on the use of the clause
11657       // `[not]inbranch`.
11658       switch (State) {
11659       case OMPDeclareSimdDeclAttr::BS_Undefined:
11660         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11661                                   OutputBecomesInput, Fn);
11662         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11663                                   OutputBecomesInput, Fn);
11664         break;
11665       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11666         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11667                                   OutputBecomesInput, Fn);
11668         break;
11669       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11670         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11671                                   OutputBecomesInput, Fn);
11672         break;
11673       }
11674     }
11675   }
11676 }
11677 
11678 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11679                                               llvm::Function *Fn) {
11680   ASTContext &C = CGM.getContext();
11681   FD = FD->getMostRecentDecl();
11682   // Map params to their positions in function decl.
11683   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11684   if (isa<CXXMethodDecl>(FD))
11685     ParamPositions.try_emplace(FD, 0);
11686   unsigned ParamPos = ParamPositions.size();
11687   for (const ParmVarDecl *P : FD->parameters()) {
11688     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11689     ++ParamPos;
11690   }
11691   while (FD) {
11692     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11693       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11694       // Mark uniform parameters.
11695       for (const Expr *E : Attr->uniforms()) {
11696         E = E->IgnoreParenImpCasts();
11697         unsigned Pos;
11698         if (isa<CXXThisExpr>(E)) {
11699           Pos = ParamPositions[FD];
11700         } else {
11701           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11702                                 ->getCanonicalDecl();
11703           Pos = ParamPositions[PVD];
11704         }
11705         ParamAttrs[Pos].Kind = Uniform;
11706       }
11707       // Get alignment info.
11708       auto NI = Attr->alignments_begin();
11709       for (const Expr *E : Attr->aligneds()) {
11710         E = E->IgnoreParenImpCasts();
11711         unsigned Pos;
11712         QualType ParmTy;
11713         if (isa<CXXThisExpr>(E)) {
11714           Pos = ParamPositions[FD];
11715           ParmTy = E->getType();
11716         } else {
11717           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11718                                 ->getCanonicalDecl();
11719           Pos = ParamPositions[PVD];
11720           ParmTy = PVD->getType();
11721         }
11722         ParamAttrs[Pos].Alignment =
11723             (*NI)
11724                 ? (*NI)->EvaluateKnownConstInt(C)
11725                 : llvm::APSInt::getUnsigned(
11726                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11727                           .getQuantity());
11728         ++NI;
11729       }
11730       // Mark linear parameters.
11731       auto SI = Attr->steps_begin();
11732       auto MI = Attr->modifiers_begin();
11733       for (const Expr *E : Attr->linears()) {
11734         E = E->IgnoreParenImpCasts();
11735         unsigned Pos;
11736         // Rescaling factor needed to compute the linear parameter
11737         // value in the mangled name.
11738         unsigned PtrRescalingFactor = 1;
11739         if (isa<CXXThisExpr>(E)) {
11740           Pos = ParamPositions[FD];
11741         } else {
11742           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11743                                 ->getCanonicalDecl();
11744           Pos = ParamPositions[PVD];
11745           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11746             PtrRescalingFactor = CGM.getContext()
11747                                      .getTypeSizeInChars(P->getPointeeType())
11748                                      .getQuantity();
11749         }
11750         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11751         ParamAttr.Kind = Linear;
11752         // Assuming a stride of 1, for `linear` without modifiers.
11753         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11754         if (*SI) {
11755           Expr::EvalResult Result;
11756           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11757             if (const auto *DRE =
11758                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11759               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11760                 ParamAttr.Kind = LinearWithVarStride;
11761                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11762                     ParamPositions[StridePVD->getCanonicalDecl()]);
11763               }
11764             }
11765           } else {
11766             ParamAttr.StrideOrArg = Result.Val.getInt();
11767           }
11768         }
11769         // If we are using a linear clause on a pointer, we need to
11770         // rescale the value of linear_step with the byte size of the
11771         // pointee type.
11772         if (Linear == ParamAttr.Kind)
11773           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11774         ++SI;
11775         ++MI;
11776       }
11777       llvm::APSInt VLENVal;
11778       SourceLocation ExprLoc;
11779       const Expr *VLENExpr = Attr->getSimdlen();
11780       if (VLENExpr) {
11781         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11782         ExprLoc = VLENExpr->getExprLoc();
11783       }
11784       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11785       if (CGM.getTriple().isX86()) {
11786         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11787       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11788         unsigned VLEN = VLENVal.getExtValue();
11789         StringRef MangledName = Fn->getName();
11790         if (CGM.getTarget().hasFeature("sve"))
11791           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11792                                          MangledName, 's', 128, Fn, ExprLoc);
11793         if (CGM.getTarget().hasFeature("neon"))
11794           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11795                                          MangledName, 'n', 128, Fn, ExprLoc);
11796       }
11797     }
11798     FD = FD->getPreviousDecl();
11799   }
11800 }
11801 
11802 namespace {
11803 /// Cleanup action for doacross support.
11804 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11805 public:
11806   static const int DoacrossFinArgs = 2;
11807 
11808 private:
11809   llvm::FunctionCallee RTLFn;
11810   llvm::Value *Args[DoacrossFinArgs];
11811 
11812 public:
11813   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11814                     ArrayRef<llvm::Value *> CallArgs)
11815       : RTLFn(RTLFn) {
11816     assert(CallArgs.size() == DoacrossFinArgs);
11817     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11818   }
11819   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11820     if (!CGF.HaveInsertPoint())
11821       return;
11822     CGF.EmitRuntimeCall(RTLFn, Args);
11823   }
11824 };
11825 } // namespace
11826 
11827 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11828                                        const OMPLoopDirective &D,
11829                                        ArrayRef<Expr *> NumIterations) {
11830   if (!CGF.HaveInsertPoint())
11831     return;
11832 
11833   ASTContext &C = CGM.getContext();
11834   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11835   RecordDecl *RD;
11836   if (KmpDimTy.isNull()) {
11837     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11838     //  kmp_int64 lo; // lower
11839     //  kmp_int64 up; // upper
11840     //  kmp_int64 st; // stride
11841     // };
11842     RD = C.buildImplicitRecord("kmp_dim");
11843     RD->startDefinition();
11844     addFieldToRecordDecl(C, RD, Int64Ty);
11845     addFieldToRecordDecl(C, RD, Int64Ty);
11846     addFieldToRecordDecl(C, RD, Int64Ty);
11847     RD->completeDefinition();
11848     KmpDimTy = C.getRecordType(RD);
11849   } else {
11850     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11851   }
11852   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11853   QualType ArrayTy =
11854       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11855 
11856   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11857   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11858   enum { LowerFD = 0, UpperFD, StrideFD };
11859   // Fill dims with data.
11860   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11861     LValue DimsLVal = CGF.MakeAddrLValue(
11862         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11863     // dims.upper = num_iterations;
11864     LValue UpperLVal = CGF.EmitLValueForField(
11865         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11866     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11867         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11868         Int64Ty, NumIterations[I]->getExprLoc());
11869     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11870     // dims.stride = 1;
11871     LValue StrideLVal = CGF.EmitLValueForField(
11872         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11873     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11874                           StrideLVal);
11875   }
11876 
11877   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11878   // kmp_int32 num_dims, struct kmp_dim * dims);
11879   llvm::Value *Args[] = {
11880       emitUpdateLocation(CGF, D.getBeginLoc()),
11881       getThreadID(CGF, D.getBeginLoc()),
11882       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11883       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11884           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11885           CGM.VoidPtrTy)};
11886 
11887   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11888       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11889   CGF.EmitRuntimeCall(RTLFn, Args);
11890   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11891       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11892   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11893       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11894   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11895                                              llvm::makeArrayRef(FiniArgs));
11896 }
11897 
11898 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11899                                           const OMPDependClause *C) {
11900   QualType Int64Ty =
11901       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11902   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11903   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11904       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11905   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11906   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11907     const Expr *CounterVal = C->getLoopData(I);
11908     assert(CounterVal);
11909     llvm::Value *CntVal = CGF.EmitScalarConversion(
11910         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11911         CounterVal->getExprLoc());
11912     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11913                           /*Volatile=*/false, Int64Ty);
11914   }
11915   llvm::Value *Args[] = {
11916       emitUpdateLocation(CGF, C->getBeginLoc()),
11917       getThreadID(CGF, C->getBeginLoc()),
11918       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11919   llvm::FunctionCallee RTLFn;
11920   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11921     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11922                                                   OMPRTL___kmpc_doacross_post);
11923   } else {
11924     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11925     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11926                                                   OMPRTL___kmpc_doacross_wait);
11927   }
11928   CGF.EmitRuntimeCall(RTLFn, Args);
11929 }
11930 
11931 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11932                                llvm::FunctionCallee Callee,
11933                                ArrayRef<llvm::Value *> Args) const {
11934   assert(Loc.isValid() && "Outlined function call location must be valid.");
11935   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11936 
11937   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11938     if (Fn->doesNotThrow()) {
11939       CGF.EmitNounwindRuntimeCall(Fn, Args);
11940       return;
11941     }
11942   }
11943   CGF.EmitRuntimeCall(Callee, Args);
11944 }
11945 
11946 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11947     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11948     ArrayRef<llvm::Value *> Args) const {
11949   emitCall(CGF, Loc, OutlinedFn, Args);
11950 }
11951 
11952 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11953   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11954     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11955       HasEmittedDeclareTargetRegion = true;
11956 }
11957 
11958 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11959                                              const VarDecl *NativeParam,
11960                                              const VarDecl *TargetParam) const {
11961   return CGF.GetAddrOfLocalVar(NativeParam);
11962 }
11963 
11964 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11965                                                    const VarDecl *VD) {
11966   if (!VD)
11967     return Address::invalid();
11968   Address UntiedAddr = Address::invalid();
11969   Address UntiedRealAddr = Address::invalid();
11970   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11971   if (It != FunctionToUntiedTaskStackMap.end()) {
11972     const UntiedLocalVarsAddressesMap &UntiedData =
11973         UntiedLocalVarsStack[It->second];
11974     auto I = UntiedData.find(VD);
11975     if (I != UntiedData.end()) {
11976       UntiedAddr = I->second.first;
11977       UntiedRealAddr = I->second.second;
11978     }
11979   }
11980   const VarDecl *CVD = VD->getCanonicalDecl();
11981   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11982     // Use the default allocation.
11983     if (!isAllocatableDecl(VD))
11984       return UntiedAddr;
11985     llvm::Value *Size;
11986     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11987     if (CVD->getType()->isVariablyModifiedType()) {
11988       Size = CGF.getTypeSize(CVD->getType());
11989       // Align the size: ((size + align - 1) / align) * align
11990       Size = CGF.Builder.CreateNUWAdd(
11991           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11992       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11993       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11994     } else {
11995       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11996       Size = CGM.getSize(Sz.alignTo(Align));
11997     }
11998     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11999     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12000     assert(AA->getAllocator() &&
12001            "Expected allocator expression for non-default allocator.");
12002     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12003     // According to the standard, the original allocator type is a enum
12004     // (integer). Convert to pointer type, if required.
12005     Allocator = CGF.EmitScalarConversion(
12006         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12007         AA->getAllocator()->getExprLoc());
12008     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12009 
12010     llvm::Value *Addr =
12011         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12012                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12013                             Args, getName({CVD->getName(), ".void.addr"}));
12014     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12015         CGM.getModule(), OMPRTL___kmpc_free);
12016     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12017     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12018         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12019     if (UntiedAddr.isValid())
12020       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12021 
12022     // Cleanup action for allocate support.
12023     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12024       llvm::FunctionCallee RTLFn;
12025       unsigned LocEncoding;
12026       Address Addr;
12027       const Expr *Allocator;
12028 
12029     public:
12030       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
12031                            Address Addr, const Expr *Allocator)
12032           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12033             Allocator(Allocator) {}
12034       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12035         if (!CGF.HaveInsertPoint())
12036           return;
12037         llvm::Value *Args[3];
12038         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12039             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12040         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12041             Addr.getPointer(), CGF.VoidPtrTy);
12042         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12043         // According to the standard, the original allocator type is a enum
12044         // (integer). Convert to pointer type, if required.
12045         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12046                                             CGF.getContext().VoidPtrTy,
12047                                             Allocator->getExprLoc());
12048         Args[2] = AllocVal;
12049 
12050         CGF.EmitRuntimeCall(RTLFn, Args);
12051       }
12052     };
12053     Address VDAddr =
12054         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12055     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12056         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12057         VDAddr, AA->getAllocator());
12058     if (UntiedRealAddr.isValid())
12059       if (auto *Region =
12060               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12061         Region->emitUntiedSwitch(CGF);
12062     return VDAddr;
12063   }
12064   return UntiedAddr;
12065 }
12066 
12067 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12068                                              const VarDecl *VD) const {
12069   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12070   if (It == FunctionToUntiedTaskStackMap.end())
12071     return false;
12072   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12073 }
12074 
12075 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12076     CodeGenModule &CGM, const OMPLoopDirective &S)
12077     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12078   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12079   if (!NeedToPush)
12080     return;
12081   NontemporalDeclsSet &DS =
12082       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12083   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12084     for (const Stmt *Ref : C->private_refs()) {
12085       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12086       const ValueDecl *VD;
12087       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12088         VD = DRE->getDecl();
12089       } else {
12090         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12091         assert((ME->isImplicitCXXThis() ||
12092                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12093                "Expected member of current class.");
12094         VD = ME->getMemberDecl();
12095       }
12096       DS.insert(VD);
12097     }
12098   }
12099 }
12100 
12101 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12102   if (!NeedToPush)
12103     return;
12104   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12105 }
12106 
12107 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12108     CodeGenFunction &CGF,
12109     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12110                          std::pair<Address, Address>> &LocalVars)
12111     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12112   if (!NeedToPush)
12113     return;
12114   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12115       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12116   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12117 }
12118 
12119 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12120   if (!NeedToPush)
12121     return;
12122   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12123 }
12124 
12125 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12126   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12127 
12128   return llvm::any_of(
12129       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12130       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12131 }
12132 
12133 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12134     const OMPExecutableDirective &S,
12135     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12136     const {
12137   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12138   // Vars in target/task regions must be excluded completely.
12139   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12140       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12141     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12142     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12143     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12144     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12145       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12146         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12147     }
12148   }
12149   // Exclude vars in private clauses.
12150   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12151     for (const Expr *Ref : C->varlists()) {
12152       if (!Ref->getType()->isScalarType())
12153         continue;
12154       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12155       if (!DRE)
12156         continue;
12157       NeedToCheckForLPCs.insert(DRE->getDecl());
12158     }
12159   }
12160   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12161     for (const Expr *Ref : C->varlists()) {
12162       if (!Ref->getType()->isScalarType())
12163         continue;
12164       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12165       if (!DRE)
12166         continue;
12167       NeedToCheckForLPCs.insert(DRE->getDecl());
12168     }
12169   }
12170   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12171     for (const Expr *Ref : C->varlists()) {
12172       if (!Ref->getType()->isScalarType())
12173         continue;
12174       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12175       if (!DRE)
12176         continue;
12177       NeedToCheckForLPCs.insert(DRE->getDecl());
12178     }
12179   }
12180   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12181     for (const Expr *Ref : C->varlists()) {
12182       if (!Ref->getType()->isScalarType())
12183         continue;
12184       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12185       if (!DRE)
12186         continue;
12187       NeedToCheckForLPCs.insert(DRE->getDecl());
12188     }
12189   }
12190   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12191     for (const Expr *Ref : C->varlists()) {
12192       if (!Ref->getType()->isScalarType())
12193         continue;
12194       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12195       if (!DRE)
12196         continue;
12197       NeedToCheckForLPCs.insert(DRE->getDecl());
12198     }
12199   }
12200   for (const Decl *VD : NeedToCheckForLPCs) {
12201     for (const LastprivateConditionalData &Data :
12202          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12203       if (Data.DeclToUniqueName.count(VD) > 0) {
12204         if (!Data.Disabled)
12205           NeedToAddForLPCsAsDisabled.insert(VD);
12206         break;
12207       }
12208     }
12209   }
12210 }
12211 
12212 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12213     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12214     : CGM(CGF.CGM),
12215       Action((CGM.getLangOpts().OpenMP >= 50 &&
12216               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12217                            [](const OMPLastprivateClause *C) {
12218                              return C->getKind() ==
12219                                     OMPC_LASTPRIVATE_conditional;
12220                            }))
12221                  ? ActionToDo::PushAsLastprivateConditional
12222                  : ActionToDo::DoNotPush) {
12223   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12224   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12225     return;
12226   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12227          "Expected a push action.");
12228   LastprivateConditionalData &Data =
12229       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12230   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12231     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12232       continue;
12233 
12234     for (const Expr *Ref : C->varlists()) {
12235       Data.DeclToUniqueName.insert(std::make_pair(
12236           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12237           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12238     }
12239   }
12240   Data.IVLVal = IVLVal;
12241   Data.Fn = CGF.CurFn;
12242 }
12243 
12244 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12245     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12246     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12247   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12248   if (CGM.getLangOpts().OpenMP < 50)
12249     return;
12250   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12251   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12252   if (!NeedToAddForLPCsAsDisabled.empty()) {
12253     Action = ActionToDo::DisableLastprivateConditional;
12254     LastprivateConditionalData &Data =
12255         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12256     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12257       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12258     Data.Fn = CGF.CurFn;
12259     Data.Disabled = true;
12260   }
12261 }
12262 
12263 CGOpenMPRuntime::LastprivateConditionalRAII
12264 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12265     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12266   return LastprivateConditionalRAII(CGF, S);
12267 }
12268 
12269 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12270   if (CGM.getLangOpts().OpenMP < 50)
12271     return;
12272   if (Action == ActionToDo::DisableLastprivateConditional) {
12273     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12274            "Expected list of disabled private vars.");
12275     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12276   }
12277   if (Action == ActionToDo::PushAsLastprivateConditional) {
12278     assert(
12279         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12280         "Expected list of lastprivate conditional vars.");
12281     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12282   }
12283 }
12284 
12285 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12286                                                         const VarDecl *VD) {
12287   ASTContext &C = CGM.getContext();
12288   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12289   if (I == LastprivateConditionalToTypes.end())
12290     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12291   QualType NewType;
12292   const FieldDecl *VDField;
12293   const FieldDecl *FiredField;
12294   LValue BaseLVal;
12295   auto VI = I->getSecond().find(VD);
12296   if (VI == I->getSecond().end()) {
12297     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12298     RD->startDefinition();
12299     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12300     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12301     RD->completeDefinition();
12302     NewType = C.getRecordType(RD);
12303     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12304     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12305     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12306   } else {
12307     NewType = std::get<0>(VI->getSecond());
12308     VDField = std::get<1>(VI->getSecond());
12309     FiredField = std::get<2>(VI->getSecond());
12310     BaseLVal = std::get<3>(VI->getSecond());
12311   }
12312   LValue FiredLVal =
12313       CGF.EmitLValueForField(BaseLVal, FiredField);
12314   CGF.EmitStoreOfScalar(
12315       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12316       FiredLVal);
12317   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12318 }
12319 
12320 namespace {
12321 /// Checks if the lastprivate conditional variable is referenced in LHS.
12322 class LastprivateConditionalRefChecker final
12323     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12324   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12325   const Expr *FoundE = nullptr;
12326   const Decl *FoundD = nullptr;
12327   StringRef UniqueDeclName;
12328   LValue IVLVal;
12329   llvm::Function *FoundFn = nullptr;
12330   SourceLocation Loc;
12331 
12332 public:
12333   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12334     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12335          llvm::reverse(LPM)) {
12336       auto It = D.DeclToUniqueName.find(E->getDecl());
12337       if (It == D.DeclToUniqueName.end())
12338         continue;
12339       if (D.Disabled)
12340         return false;
12341       FoundE = E;
12342       FoundD = E->getDecl()->getCanonicalDecl();
12343       UniqueDeclName = It->second;
12344       IVLVal = D.IVLVal;
12345       FoundFn = D.Fn;
12346       break;
12347     }
12348     return FoundE == E;
12349   }
12350   bool VisitMemberExpr(const MemberExpr *E) {
12351     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12352       return false;
12353     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12354          llvm::reverse(LPM)) {
12355       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12356       if (It == D.DeclToUniqueName.end())
12357         continue;
12358       if (D.Disabled)
12359         return false;
12360       FoundE = E;
12361       FoundD = E->getMemberDecl()->getCanonicalDecl();
12362       UniqueDeclName = It->second;
12363       IVLVal = D.IVLVal;
12364       FoundFn = D.Fn;
12365       break;
12366     }
12367     return FoundE == E;
12368   }
12369   bool VisitStmt(const Stmt *S) {
12370     for (const Stmt *Child : S->children()) {
12371       if (!Child)
12372         continue;
12373       if (const auto *E = dyn_cast<Expr>(Child))
12374         if (!E->isGLValue())
12375           continue;
12376       if (Visit(Child))
12377         return true;
12378     }
12379     return false;
12380   }
12381   explicit LastprivateConditionalRefChecker(
12382       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12383       : LPM(LPM) {}
12384   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12385   getFoundData() const {
12386     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12387   }
12388 };
12389 } // namespace
12390 
12391 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12392                                                        LValue IVLVal,
12393                                                        StringRef UniqueDeclName,
12394                                                        LValue LVal,
12395                                                        SourceLocation Loc) {
12396   // Last updated loop counter for the lastprivate conditional var.
12397   // int<xx> last_iv = 0;
12398   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12399   llvm::Constant *LastIV =
12400       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12401   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12402       IVLVal.getAlignment().getAsAlign());
12403   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12404 
12405   // Last value of the lastprivate conditional.
12406   // decltype(priv_a) last_a;
12407   llvm::Constant *Last = getOrCreateInternalVariable(
12408       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12409   cast<llvm::GlobalVariable>(Last)->setAlignment(
12410       LVal.getAlignment().getAsAlign());
12411   LValue LastLVal =
12412       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12413 
12414   // Global loop counter. Required to handle inner parallel-for regions.
12415   // iv
12416   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12417 
12418   // #pragma omp critical(a)
12419   // if (last_iv <= iv) {
12420   //   last_iv = iv;
12421   //   last_a = priv_a;
12422   // }
12423   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12424                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12425     Action.Enter(CGF);
12426     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12427     // (last_iv <= iv) ? Check if the variable is updated and store new
12428     // value in global var.
12429     llvm::Value *CmpRes;
12430     if (IVLVal.getType()->isSignedIntegerType()) {
12431       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12432     } else {
12433       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12434              "Loop iteration variable must be integer.");
12435       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12436     }
12437     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12438     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12439     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12440     // {
12441     CGF.EmitBlock(ThenBB);
12442 
12443     //   last_iv = iv;
12444     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12445 
12446     //   last_a = priv_a;
12447     switch (CGF.getEvaluationKind(LVal.getType())) {
12448     case TEK_Scalar: {
12449       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12450       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12451       break;
12452     }
12453     case TEK_Complex: {
12454       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12455       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12456       break;
12457     }
12458     case TEK_Aggregate:
12459       llvm_unreachable(
12460           "Aggregates are not supported in lastprivate conditional.");
12461     }
12462     // }
12463     CGF.EmitBranch(ExitBB);
12464     // There is no need to emit line number for unconditional branch.
12465     (void)ApplyDebugLocation::CreateEmpty(CGF);
12466     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12467   };
12468 
12469   if (CGM.getLangOpts().OpenMPSimd) {
12470     // Do not emit as a critical region as no parallel region could be emitted.
12471     RegionCodeGenTy ThenRCG(CodeGen);
12472     ThenRCG(CGF);
12473   } else {
12474     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12475   }
12476 }
12477 
12478 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12479                                                          const Expr *LHS) {
12480   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12481     return;
12482   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12483   if (!Checker.Visit(LHS))
12484     return;
12485   const Expr *FoundE;
12486   const Decl *FoundD;
12487   StringRef UniqueDeclName;
12488   LValue IVLVal;
12489   llvm::Function *FoundFn;
12490   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12491       Checker.getFoundData();
12492   if (FoundFn != CGF.CurFn) {
12493     // Special codegen for inner parallel regions.
12494     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12495     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12496     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12497            "Lastprivate conditional is not found in outer region.");
12498     QualType StructTy = std::get<0>(It->getSecond());
12499     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12500     LValue PrivLVal = CGF.EmitLValue(FoundE);
12501     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12502         PrivLVal.getAddress(CGF),
12503         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12504     LValue BaseLVal =
12505         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12506     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12507     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12508                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12509                         FiredLVal, llvm::AtomicOrdering::Unordered,
12510                         /*IsVolatile=*/true, /*isInit=*/false);
12511     return;
12512   }
12513 
12514   // Private address of the lastprivate conditional in the current context.
12515   // priv_a
12516   LValue LVal = CGF.EmitLValue(FoundE);
12517   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12518                                    FoundE->getExprLoc());
12519 }
12520 
12521 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12522     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12523     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12524   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12525     return;
12526   auto Range = llvm::reverse(LastprivateConditionalStack);
12527   auto It = llvm::find_if(
12528       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12529   if (It == Range.end() || It->Fn != CGF.CurFn)
12530     return;
12531   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12532   assert(LPCI != LastprivateConditionalToTypes.end() &&
12533          "Lastprivates must be registered already.");
12534   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12535   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12536   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12537   for (const auto &Pair : It->DeclToUniqueName) {
12538     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12539     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12540       continue;
12541     auto I = LPCI->getSecond().find(Pair.first);
12542     assert(I != LPCI->getSecond().end() &&
12543            "Lastprivate must be rehistered already.");
12544     // bool Cmp = priv_a.Fired != 0;
12545     LValue BaseLVal = std::get<3>(I->getSecond());
12546     LValue FiredLVal =
12547         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12548     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12549     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12550     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12551     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12552     // if (Cmp) {
12553     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12554     CGF.EmitBlock(ThenBB);
12555     Address Addr = CGF.GetAddrOfLocalVar(VD);
12556     LValue LVal;
12557     if (VD->getType()->isReferenceType())
12558       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12559                                            AlignmentSource::Decl);
12560     else
12561       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12562                                 AlignmentSource::Decl);
12563     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12564                                      D.getBeginLoc());
12565     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12566     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12567     // }
12568   }
12569 }
12570 
12571 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12572     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12573     SourceLocation Loc) {
12574   if (CGF.getLangOpts().OpenMP < 50)
12575     return;
12576   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12577   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12578          "Unknown lastprivate conditional variable.");
12579   StringRef UniqueName = It->second;
12580   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12581   // The variable was not updated in the region - exit.
12582   if (!GV)
12583     return;
12584   LValue LPLVal = CGF.MakeAddrLValue(
12585       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12586   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12587   CGF.EmitStoreOfScalar(Res, PrivLVal);
12588 }
12589 
12590 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12591     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12592     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12593   llvm_unreachable("Not supported in SIMD-only mode");
12594 }
12595 
12596 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12597     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12598     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12599   llvm_unreachable("Not supported in SIMD-only mode");
12600 }
12601 
12602 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12603     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12604     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12605     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12606     bool Tied, unsigned &NumberOfParts) {
12607   llvm_unreachable("Not supported in SIMD-only mode");
12608 }
12609 
12610 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12611                                            SourceLocation Loc,
12612                                            llvm::Function *OutlinedFn,
12613                                            ArrayRef<llvm::Value *> CapturedVars,
12614                                            const Expr *IfCond) {
12615   llvm_unreachable("Not supported in SIMD-only mode");
12616 }
12617 
12618 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12619     CodeGenFunction &CGF, StringRef CriticalName,
12620     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12621     const Expr *Hint) {
12622   llvm_unreachable("Not supported in SIMD-only mode");
12623 }
12624 
12625 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12626                                            const RegionCodeGenTy &MasterOpGen,
12627                                            SourceLocation Loc) {
12628   llvm_unreachable("Not supported in SIMD-only mode");
12629 }
12630 
12631 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12632                                            const RegionCodeGenTy &MasterOpGen,
12633                                            SourceLocation Loc,
12634                                            const Expr *Filter) {
12635   llvm_unreachable("Not supported in SIMD-only mode");
12636 }
12637 
12638 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12639                                             SourceLocation Loc) {
12640   llvm_unreachable("Not supported in SIMD-only mode");
12641 }
12642 
12643 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12644     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12645     SourceLocation Loc) {
12646   llvm_unreachable("Not supported in SIMD-only mode");
12647 }
12648 
12649 void CGOpenMPSIMDRuntime::emitSingleRegion(
12650     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12651     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12652     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12653     ArrayRef<const Expr *> AssignmentOps) {
12654   llvm_unreachable("Not supported in SIMD-only mode");
12655 }
12656 
12657 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12658                                             const RegionCodeGenTy &OrderedOpGen,
12659                                             SourceLocation Loc,
12660                                             bool IsThreads) {
12661   llvm_unreachable("Not supported in SIMD-only mode");
12662 }
12663 
12664 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12665                                           SourceLocation Loc,
12666                                           OpenMPDirectiveKind Kind,
12667                                           bool EmitChecks,
12668                                           bool ForceSimpleCall) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12673     CodeGenFunction &CGF, SourceLocation Loc,
12674     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12675     bool Ordered, const DispatchRTInput &DispatchValues) {
12676   llvm_unreachable("Not supported in SIMD-only mode");
12677 }
12678 
12679 void CGOpenMPSIMDRuntime::emitForStaticInit(
12680     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12681     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12682   llvm_unreachable("Not supported in SIMD-only mode");
12683 }
12684 
12685 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12686     CodeGenFunction &CGF, SourceLocation Loc,
12687     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12688   llvm_unreachable("Not supported in SIMD-only mode");
12689 }
12690 
12691 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12692                                                      SourceLocation Loc,
12693                                                      unsigned IVSize,
12694                                                      bool IVSigned) {
12695   llvm_unreachable("Not supported in SIMD-only mode");
12696 }
12697 
12698 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12699                                               SourceLocation Loc,
12700                                               OpenMPDirectiveKind DKind) {
12701   llvm_unreachable("Not supported in SIMD-only mode");
12702 }
12703 
12704 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12705                                               SourceLocation Loc,
12706                                               unsigned IVSize, bool IVSigned,
12707                                               Address IL, Address LB,
12708                                               Address UB, Address ST) {
12709   llvm_unreachable("Not supported in SIMD-only mode");
12710 }
12711 
12712 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12713                                                llvm::Value *NumThreads,
12714                                                SourceLocation Loc) {
12715   llvm_unreachable("Not supported in SIMD-only mode");
12716 }
12717 
12718 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12719                                              ProcBindKind ProcBind,
12720                                              SourceLocation Loc) {
12721   llvm_unreachable("Not supported in SIMD-only mode");
12722 }
12723 
12724 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12725                                                     const VarDecl *VD,
12726                                                     Address VDAddr,
12727                                                     SourceLocation Loc) {
12728   llvm_unreachable("Not supported in SIMD-only mode");
12729 }
12730 
12731 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12732     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12733     CodeGenFunction *CGF) {
12734   llvm_unreachable("Not supported in SIMD-only mode");
12735 }
12736 
12737 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12738     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12739   llvm_unreachable("Not supported in SIMD-only mode");
12740 }
12741 
12742 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12743                                     ArrayRef<const Expr *> Vars,
12744                                     SourceLocation Loc,
12745                                     llvm::AtomicOrdering AO) {
12746   llvm_unreachable("Not supported in SIMD-only mode");
12747 }
12748 
12749 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12750                                        const OMPExecutableDirective &D,
12751                                        llvm::Function *TaskFunction,
12752                                        QualType SharedsTy, Address Shareds,
12753                                        const Expr *IfCond,
12754                                        const OMPTaskDataTy &Data) {
12755   llvm_unreachable("Not supported in SIMD-only mode");
12756 }
12757 
12758 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12759     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12760     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12761     const Expr *IfCond, const OMPTaskDataTy &Data) {
12762   llvm_unreachable("Not supported in SIMD-only mode");
12763 }
12764 
12765 void CGOpenMPSIMDRuntime::emitReduction(
12766     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12767     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12768     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12769   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12770   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12771                                  ReductionOps, Options);
12772 }
12773 
12774 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12775     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12776     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12777   llvm_unreachable("Not supported in SIMD-only mode");
12778 }
12779 
12780 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12781                                                 SourceLocation Loc,
12782                                                 bool IsWorksharingReduction) {
12783   llvm_unreachable("Not supported in SIMD-only mode");
12784 }
12785 
12786 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12787                                                   SourceLocation Loc,
12788                                                   ReductionCodeGen &RCG,
12789                                                   unsigned N) {
12790   llvm_unreachable("Not supported in SIMD-only mode");
12791 }
12792 
12793 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12794                                                   SourceLocation Loc,
12795                                                   llvm::Value *ReductionsPtr,
12796                                                   LValue SharedLVal) {
12797   llvm_unreachable("Not supported in SIMD-only mode");
12798 }
12799 
12800 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12801                                            SourceLocation Loc) {
12802   llvm_unreachable("Not supported in SIMD-only mode");
12803 }
12804 
12805 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12806     CodeGenFunction &CGF, SourceLocation Loc,
12807     OpenMPDirectiveKind CancelRegion) {
12808   llvm_unreachable("Not supported in SIMD-only mode");
12809 }
12810 
12811 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12812                                          SourceLocation Loc, const Expr *IfCond,
12813                                          OpenMPDirectiveKind CancelRegion) {
12814   llvm_unreachable("Not supported in SIMD-only mode");
12815 }
12816 
12817 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12818     const OMPExecutableDirective &D, StringRef ParentName,
12819     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12820     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12821   llvm_unreachable("Not supported in SIMD-only mode");
12822 }
12823 
12824 void CGOpenMPSIMDRuntime::emitTargetCall(
12825     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12826     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12827     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12828     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12829                                      const OMPLoopDirective &D)>
12830         SizeEmitter) {
12831   llvm_unreachable("Not supported in SIMD-only mode");
12832 }
12833 
12834 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12835   llvm_unreachable("Not supported in SIMD-only mode");
12836 }
12837 
12838 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12839   llvm_unreachable("Not supported in SIMD-only mode");
12840 }
12841 
12842 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12843   return false;
12844 }
12845 
12846 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12847                                         const OMPExecutableDirective &D,
12848                                         SourceLocation Loc,
12849                                         llvm::Function *OutlinedFn,
12850                                         ArrayRef<llvm::Value *> CapturedVars) {
12851   llvm_unreachable("Not supported in SIMD-only mode");
12852 }
12853 
12854 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12855                                              const Expr *NumTeams,
12856                                              const Expr *ThreadLimit,
12857                                              SourceLocation Loc) {
12858   llvm_unreachable("Not supported in SIMD-only mode");
12859 }
12860 
12861 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12862     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12863     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
12867 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12868     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12869     const Expr *Device) {
12870   llvm_unreachable("Not supported in SIMD-only mode");
12871 }
12872 
12873 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12874                                            const OMPLoopDirective &D,
12875                                            ArrayRef<Expr *> NumIterations) {
12876   llvm_unreachable("Not supported in SIMD-only mode");
12877 }
12878 
12879 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12880                                               const OMPDependClause *C) {
12881   llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883 
12884 const VarDecl *
12885 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12886                                         const VarDecl *NativeParam) const {
12887   llvm_unreachable("Not supported in SIMD-only mode");
12888 }
12889 
12890 Address
12891 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12892                                          const VarDecl *NativeParam,
12893                                          const VarDecl *TargetParam) const {
12894   llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896