1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412   bool NoInheritance = false;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel,
421                           bool NoInheritance = true)
422       : CGF(CGF), NoInheritance(NoInheritance) {
423     // Start emission for the construct.
424     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
425         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
426     if (NoInheritance) {
427       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
429       CGF.LambdaThisCaptureField = nullptr;
430       BlockInfo = CGF.BlockInfo;
431       CGF.BlockInfo = nullptr;
432     }
433   }
434 
435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     if (NoInheritance) {
442       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
443       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
444       CGF.BlockInfo = BlockInfo;
445     }
446   }
447 };
448 
449 /// Values for bit flags used in the ident_t to describe the fields.
450 /// All enumeric elements are named and described in accordance with the code
451 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
452 enum OpenMPLocationFlags : unsigned {
453   /// Use trampoline for internal microtask.
454   OMP_IDENT_IMD = 0x01,
455   /// Use c-style ident structure.
456   OMP_IDENT_KMPC = 0x02,
457   /// Atomic reduction option for kmpc_reduce.
458   OMP_ATOMIC_REDUCE = 0x10,
459   /// Explicit 'barrier' directive.
460   OMP_IDENT_BARRIER_EXPL = 0x20,
461   /// Implicit barrier in code.
462   OMP_IDENT_BARRIER_IMPL = 0x40,
463   /// Implicit barrier in 'for' directive.
464   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
465   /// Implicit barrier in 'sections' directive.
466   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
467   /// Implicit barrier in 'single' directive.
468   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
469   /// Call of __kmp_for_static_init for static loop.
470   OMP_IDENT_WORK_LOOP = 0x200,
471   /// Call of __kmp_for_static_init for sections.
472   OMP_IDENT_WORK_SECTIONS = 0x400,
473   /// Call of __kmp_for_static_init for distribute.
474   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
475   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
476 };
477 
478 namespace {
479 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
480 /// Values for bit flags for marking which requires clauses have been used.
481 enum OpenMPOffloadingRequiresDirFlags : int64_t {
482   /// flag undefined.
483   OMP_REQ_UNDEFINED               = 0x000,
484   /// no requires clause present.
485   OMP_REQ_NONE                    = 0x001,
486   /// reverse_offload clause.
487   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
488   /// unified_address clause.
489   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
490   /// unified_shared_memory clause.
491   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
492   /// dynamic_allocators clause.
493   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
494   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
495 };
496 
497 enum OpenMPOffloadingReservedDeviceIDs {
498   /// Device ID if the device was not defined, runtime should get it
499   /// from environment variables in the spec.
500   OMP_DEVICEID_UNDEF = -1,
501 };
502 } // anonymous namespace
503 
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
510 ///                                  see above  */
511 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
512 ///                                  KMP_IDENT_KMPC identifies this union
513 ///                                  member  */
514 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
515 ///                                  see above */
516 ///#if USE_ITT_BUILD
517 ///                            /*  but currently used for storing
518 ///                                region-specific ITT */
519 ///                            /*  contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
522 ///                                 C++  */
523 ///    char const *psource;    /**< String describing the source location.
524 ///                            The string is composed of semi-colon separated
525 //                             fields which describe the source file,
526 ///                            the function and a pair of line numbers that
527 ///                            delimit the construct.
528 ///                             */
529 /// } ident_t;
530 enum IdentFieldIndex {
531   /// might be used in Fortran
532   IdentField_Reserved_1,
533   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534   IdentField_Flags,
535   /// Not really used in Fortran any more
536   IdentField_Reserved_2,
537   /// Source[4] in Fortran, do not use for C++
538   IdentField_Reserved_3,
539   /// String describing the source location. The string is composed of
540   /// semi-colon separated fields which describe the source file, the function
541   /// and a pair of line numbers that delimit the construct.
542   IdentField_PSource
543 };
544 
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548   /// Lower bound for default (unordered) versions.
549   OMP_sch_lower = 32,
550   OMP_sch_static_chunked = 33,
551   OMP_sch_static = 34,
552   OMP_sch_dynamic_chunked = 35,
553   OMP_sch_guided_chunked = 36,
554   OMP_sch_runtime = 37,
555   OMP_sch_auto = 38,
556   /// static with chunk adjustment (e.g., simd)
557   OMP_sch_static_balanced_chunked = 45,
558   /// Lower bound for 'ordered' versions.
559   OMP_ord_lower = 64,
560   OMP_ord_static_chunked = 65,
561   OMP_ord_static = 66,
562   OMP_ord_dynamic_chunked = 67,
563   OMP_ord_guided_chunked = 68,
564   OMP_ord_runtime = 69,
565   OMP_ord_auto = 70,
566   OMP_sch_default = OMP_sch_static,
567   /// dist_schedule types
568   OMP_dist_sch_static_chunked = 91,
569   OMP_dist_sch_static = 92,
570   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571   /// Set if the monotonic schedule modifier was present.
572   OMP_sch_modifier_monotonic = (1 << 29),
573   /// Set if the nonmonotonic schedule modifier was present.
574   OMP_sch_modifier_nonmonotonic = (1 << 30),
575 };
576 
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580   PrePostActionTy *Action;
581 
582 public:
583   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585     if (!CGF.HaveInsertPoint())
586       return;
587     Action->Exit(CGF);
588   }
589 };
590 
591 } // anonymous namespace
592 
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594   CodeGenFunction::RunCleanupsScope Scope(CGF);
595   if (PrePostAction) {
596     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597     Callback(CodeGen, CGF, *PrePostAction);
598   } else {
599     PrePostActionTy Action;
600     Callback(CodeGen, CGF, Action);
601   }
602 }
603 
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610       if (const auto *DRE =
611               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613           return DRD;
614   return nullptr;
615 }
616 
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618                                              const OMPDeclareReductionDecl *DRD,
619                                              const Expr *InitOp,
620                                              Address Private, Address Original,
621                                              QualType Ty) {
622   if (DRD->getInitializer()) {
623     std::pair<llvm::Function *, llvm::Function *> Reduction =
624         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625     const auto *CE = cast<CallExpr>(InitOp);
626     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629     const auto *LHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631     const auto *RHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
635                             [=]() { return Private; });
636     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
637                             [=]() { return Original; });
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate: {
659       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
660       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
661       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
662                            /*IsInitializer=*/false);
663       return;
664     }
665     }
666     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
667     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
668     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
669                          /*IsInitializer=*/false);
670   }
671 }
672 
673 /// Emit initialization of arrays of complex types.
674 /// \param DestAddr Address of the array.
675 /// \param Type Type of array.
676 /// \param Init Initial expression of array.
677 /// \param SrcAddr Address of the original array.
678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
679                                  QualType Type, bool EmitDeclareReductionInit,
680                                  const Expr *Init,
681                                  const OMPDeclareReductionDecl *DRD,
682                                  Address SrcAddr = Address::invalid()) {
683   // Perform element-by-element initialization.
684   QualType ElementTy;
685 
686   // Drill down to the base element type on both arrays.
687   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
688   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
689   DestAddr =
690       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI,
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI,
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
746     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
747   }
748 
749   // Shift the address forward by one element.
750   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
751       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
752   // Check whether we've reached the end.
753   llvm::Value *Done =
754       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
756   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
757 
758   // Done.
759   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
760 }
761 
762 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
763   return CGF.EmitOMPSharedLValue(E);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
767                                             const Expr *E) {
768   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
769     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
770   return LValue();
771 }
772 
773 void ReductionCodeGen::emitAggregateInitialization(
774     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
775     const OMPDeclareReductionDecl *DRD) {
776   // Emit VarDecl with copy init for arrays.
777   // Get the address of the original variable captured in current
778   // captured region.
779   const auto *PrivateVD =
780       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
781   bool EmitDeclareReductionInit =
782       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
783   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
784                        EmitDeclareReductionInit,
785                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
786                                                 : PrivateVD->getInit(),
787                        DRD, SharedLVal.getAddress(CGF));
788 }
789 
790 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
791                                    ArrayRef<const Expr *> Origs,
792                                    ArrayRef<const Expr *> Privates,
793                                    ArrayRef<const Expr *> ReductionOps) {
794   ClausesData.reserve(Shareds.size());
795   SharedAddresses.reserve(Shareds.size());
796   Sizes.reserve(Shareds.size());
797   BaseDecls.reserve(Shareds.size());
798   const auto *IOrig = Origs.begin();
799   const auto *IPriv = Privates.begin();
800   const auto *IRed = ReductionOps.begin();
801   for (const Expr *Ref : Shareds) {
802     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
803     std::advance(IOrig, 1);
804     std::advance(IPriv, 1);
805     std::advance(IRed, 1);
806   }
807 }
808 
809 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
810   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
811          "Number of generated lvalues must be exactly N.");
812   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
813   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
814   SharedAddresses.emplace_back(First, Second);
815   if (ClausesData[N].Shared == ClausesData[N].Ref) {
816     OrigAddresses.emplace_back(First, Second);
817   } else {
818     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
819     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
820     OrigAddresses.emplace_back(First, Second);
821   }
822 }
823 
824 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
825   const auto *PrivateVD =
826       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
827   QualType PrivateType = PrivateVD->getType();
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType =
838       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
839           ->getElementType();
840   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
841   if (AsArraySection) {
842     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
843                                      OrigAddresses[N].first.getPointer(CGF));
844     Size = CGF.Builder.CreateNUWAdd(
845         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
846     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
847   } else {
848     SizeInChars =
849         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
850     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851   }
852   Sizes.emplace_back(SizeInChars, Size);
853   CodeGenFunction::OpaqueValueMapping OpaqueMap(
854       CGF,
855       cast<OpaqueValueExpr>(
856           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
857       RValue::get(Size));
858   CGF.EmitVariablyModifiedType(PrivateType);
859 }
860 
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
862                                          llvm::Value *Size) {
863   const auto *PrivateVD =
864       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
865   QualType PrivateType = PrivateVD->getType();
866   if (!PrivateType->isVariablyModifiedType()) {
867     assert(!Size && !Sizes[N].second &&
868            "Size should be nullptr for non-variably modified reduction "
869            "items.");
870     return;
871   }
872   CodeGenFunction::OpaqueValueMapping OpaqueMap(
873       CGF,
874       cast<OpaqueValueExpr>(
875           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
876       RValue::get(Size));
877   CGF.EmitVariablyModifiedType(PrivateType);
878 }
879 
880 void ReductionCodeGen::emitInitialization(
881     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
882     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
883   assert(SharedAddresses.size() > N && "No variable was generated");
884   const auto *PrivateVD =
885       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
886   const OMPDeclareReductionDecl *DRD =
887       getReductionInit(ClausesData[N].ReductionOp);
888   QualType PrivateType = PrivateVD->getType();
889   PrivateAddr = CGF.Builder.CreateElementBitCast(
890       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
891   QualType SharedType = SharedAddresses[N].first.getType();
892   SharedLVal = CGF.MakeAddrLValue(
893       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
894                                        CGF.ConvertTypeForMem(SharedType)),
895       SharedType, SharedAddresses[N].first.getBaseInfo(),
896       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
897   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
898     if (DRD && DRD->getInitializer())
899       (void)DefaultInit(CGF);
900     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
901   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
902     (void)DefaultInit(CGF);
903     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
904                                      PrivateAddr, SharedLVal.getAddress(CGF),
905                                      SharedLVal.getType());
906   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
907              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
908     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
909                          PrivateVD->getType().getQualifiers(),
910                          /*IsInitializer=*/false);
911   }
912 }
913 
914 bool ReductionCodeGen::needCleanups(unsigned N) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   return DTorKind != QualType::DK_none;
920 }
921 
922 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
923                                     Address PrivateAddr) {
924   const auto *PrivateVD =
925       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926   QualType PrivateType = PrivateVD->getType();
927   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
928   if (needCleanups(N)) {
929     PrivateAddr = CGF.Builder.CreateElementBitCast(
930         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
931     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
932   }
933 }
934 
935 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
936                           LValue BaseLV) {
937   BaseTy = BaseTy.getNonReferenceType();
938   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
939          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
940     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
941       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
942     } else {
943       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
944       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
945     }
946     BaseTy = BaseTy->getPointeeType();
947   }
948   return CGF.MakeAddrLValue(
949       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
950                                        CGF.ConvertTypeForMem(ElTy)),
951       BaseLV.getType(), BaseLV.getBaseInfo(),
952       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
953 }
954 
955 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
956                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
957                           llvm::Value *Addr) {
958   Address Tmp = Address::invalid();
959   Address TopTmp = Address::invalid();
960   Address MostTopTmp = Address::invalid();
961   BaseTy = BaseTy.getNonReferenceType();
962   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
963          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
964     Tmp = CGF.CreateMemTemp(BaseTy);
965     if (TopTmp.isValid())
966       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
967     else
968       MostTopTmp = Tmp;
969     TopTmp = Tmp;
970     BaseTy = BaseTy->getPointeeType();
971   }
972   llvm::Type *Ty = BaseLVType;
973   if (Tmp.isValid())
974     Ty = Tmp.getElementType();
975   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
976   if (Tmp.isValid()) {
977     CGF.Builder.CreateStore(Addr, Tmp);
978     return MostTopTmp;
979   }
980   return Address(Addr, BaseLVAlignment);
981 }
982 
983 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
984   const VarDecl *OrigVD = nullptr;
985   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
986     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
987     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
988       Base = TempOASE->getBase()->IgnoreParenImpCasts();
989     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
990       Base = TempASE->getBase()->IgnoreParenImpCasts();
991     DE = cast<DeclRefExpr>(Base);
992     OrigVD = cast<VarDecl>(DE->getDecl());
993   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
994     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
995     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
996       Base = TempASE->getBase()->IgnoreParenImpCasts();
997     DE = cast<DeclRefExpr>(Base);
998     OrigVD = cast<VarDecl>(DE->getDecl());
999   }
1000   return OrigVD;
1001 }
1002 
1003 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1004                                                Address PrivateAddr) {
1005   const DeclRefExpr *DE;
1006   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1007     BaseDecls.emplace_back(OrigVD);
1008     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1009     LValue BaseLValue =
1010         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1011                     OriginalBaseLValue);
1012     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1013         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1014     llvm::Value *PrivatePointer =
1015         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1016             PrivateAddr.getPointer(),
1017             SharedAddresses[N].first.getAddress(CGF).getType());
1018     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1019     return castToBase(CGF, OrigVD->getType(),
1020                       SharedAddresses[N].first.getType(),
1021                       OriginalBaseLValue.getAddress(CGF).getType(),
1022                       OriginalBaseLValue.getAlignment(), Ptr);
1023   }
1024   BaseDecls.emplace_back(
1025       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1026   return PrivateAddr;
1027 }
1028 
1029 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1030   const OMPDeclareReductionDecl *DRD =
1031       getReductionInit(ClausesData[N].ReductionOp);
1032   return DRD && DRD->getInitializer();
1033 }
1034 
1035 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1036   return CGF.EmitLoadOfPointerLValue(
1037       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1038       getThreadIDVariable()->getType()->castAs<PointerType>());
1039 }
1040 
1041 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1042   if (!CGF.HaveInsertPoint())
1043     return;
1044   // 1.2.2 OpenMP Language Terminology
1045   // Structured block - An executable statement with a single entry at the
1046   // top and a single exit at the bottom.
1047   // The point of exit cannot be a branch out of the structured block.
1048   // longjmp() and throw() must not violate the entry/exit criteria.
1049   CGF.EHStack.pushTerminate();
1050   if (S)
1051     CGF.incrementProfileCounter(S);
1052   CodeGen(CGF);
1053   CGF.EHStack.popTerminate();
1054 }
1055 
1056 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1057     CodeGenFunction &CGF) {
1058   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1059                             getThreadIDVariable()->getType(),
1060                             AlignmentSource::Decl);
1061 }
1062 
1063 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1064                                        QualType FieldTy) {
1065   auto *Field = FieldDecl::Create(
1066       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1067       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1068       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1069   Field->setAccess(AS_public);
1070   DC->addDecl(Field);
1071   return Field;
1072 }
1073 
1074 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1075                                  StringRef Separator)
1076     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1077       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1078   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1079 
1080   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1081   OMPBuilder.initialize();
1082   loadOffloadInfoMetadata();
1083 }
1084 
1085 void CGOpenMPRuntime::clear() {
1086   InternalVars.clear();
1087   // Clean non-target variable declarations possibly used only in debug info.
1088   for (const auto &Data : EmittedNonTargetVariables) {
1089     if (!Data.getValue().pointsToAliveValue())
1090       continue;
1091     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1092     if (!GV)
1093       continue;
1094     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1095       continue;
1096     GV->eraseFromParent();
1097   }
1098 }
1099 
1100 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1101   SmallString<128> Buffer;
1102   llvm::raw_svector_ostream OS(Buffer);
1103   StringRef Sep = FirstSeparator;
1104   for (StringRef Part : Parts) {
1105     OS << Sep << Part;
1106     Sep = Separator;
1107   }
1108   return std::string(OS.str());
1109 }
1110 
1111 static llvm::Function *
1112 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1113                           const Expr *CombinerInitializer, const VarDecl *In,
1114                           const VarDecl *Out, bool IsCombiner) {
1115   // void .omp_combiner.(Ty *in, Ty *out);
1116   ASTContext &C = CGM.getContext();
1117   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1118   FunctionArgList Args;
1119   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1120                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1122                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1123   Args.push_back(&OmpOutParm);
1124   Args.push_back(&OmpInParm);
1125   const CGFunctionInfo &FnInfo =
1126       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1127   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1128   std::string Name = CGM.getOpenMPRuntime().getName(
1129       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1130   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1131                                     Name, &CGM.getModule());
1132   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1133   if (CGM.getLangOpts().Optimize) {
1134     Fn->removeFnAttr(llvm::Attribute::NoInline);
1135     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1136     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1137   }
1138   CodeGenFunction CGF(CGM);
1139   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1140   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1141   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1142                     Out->getLocation());
1143   CodeGenFunction::OMPPrivateScope Scope(CGF);
1144   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1145   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1146     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1147         .getAddress(CGF);
1148   });
1149   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1150   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   (void)Scope.Privatize();
1155   if (!IsCombiner && Out->hasInit() &&
1156       !CGF.isTrivialInitializer(Out->getInit())) {
1157     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1158                          Out->getType().getQualifiers(),
1159                          /*IsInitializer=*/true);
1160   }
1161   if (CombinerInitializer)
1162     CGF.EmitIgnoredExpr(CombinerInitializer);
1163   Scope.ForceCleanup();
1164   CGF.FinishFunction();
1165   return Fn;
1166 }
1167 
1168 void CGOpenMPRuntime::emitUserDefinedReduction(
1169     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1170   if (UDRMap.count(D) > 0)
1171     return;
1172   llvm::Function *Combiner = emitCombinerOrInitializer(
1173       CGM, D->getType(), D->getCombiner(),
1174       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1176       /*IsCombiner=*/true);
1177   llvm::Function *Initializer = nullptr;
1178   if (const Expr *Init = D->getInitializer()) {
1179     Initializer = emitCombinerOrInitializer(
1180         CGM, D->getType(),
1181         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1182                                                                      : nullptr,
1183         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1185         /*IsCombiner=*/false);
1186   }
1187   UDRMap.try_emplace(D, Combiner, Initializer);
1188   if (CGF) {
1189     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1190     Decls.second.push_back(D);
1191   }
1192 }
1193 
1194 std::pair<llvm::Function *, llvm::Function *>
1195 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1196   auto I = UDRMap.find(D);
1197   if (I != UDRMap.end())
1198     return I->second;
1199   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1200   return UDRMap.lookup(D);
1201 }
1202 
1203 namespace {
1204 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1205 // Builder if one is present.
1206 struct PushAndPopStackRAII {
1207   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1208                       bool HasCancel)
1209       : OMPBuilder(OMPBuilder) {
1210     if (!OMPBuilder)
1211       return;
1212 
1213     // The following callback is the crucial part of clangs cleanup process.
1214     //
1215     // NOTE:
1216     // Once the OpenMPIRBuilder is used to create parallel regions (and
1217     // similar), the cancellation destination (Dest below) is determined via
1218     // IP. That means if we have variables to finalize we split the block at IP,
1219     // use the new block (=BB) as destination to build a JumpDest (via
1220     // getJumpDestInCurrentScope(BB)) which then is fed to
1221     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1222     // to push & pop an FinalizationInfo object.
1223     // The FiniCB will still be needed but at the point where the
1224     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1225     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1226       assert(IP.getBlock()->end() == IP.getPoint() &&
1227              "Clang CG should cause non-terminated block!");
1228       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1229       CGF.Builder.restoreIP(IP);
1230       CodeGenFunction::JumpDest Dest =
1231           CGF.getOMPCancelDestination(OMPD_parallel);
1232       CGF.EmitBranchThroughCleanup(Dest);
1233     };
1234 
1235     // TODO: Remove this once we emit parallel regions through the
1236     //       OpenMPIRBuilder as it can do this setup internally.
1237     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1238         {FiniCB, OMPD_parallel, HasCancel});
1239     OMPBuilder->pushFinalizationCB(std::move(FI));
1240   }
1241   ~PushAndPopStackRAII() {
1242     if (OMPBuilder)
1243       OMPBuilder->popFinalizationCB();
1244   }
1245   llvm::OpenMPIRBuilder *OMPBuilder;
1246 };
1247 } // namespace
1248 
1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253   assert(ThreadIDVar->getType()->isPointerType() &&
1254          "thread id variable must be of type kmp_int32 *");
1255   CodeGenFunction CGF(CGM, true);
1256   bool HasCancel = false;
1257   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260     HasCancel = OPD->hasCancel();
1261   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262     HasCancel = OPSD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275 
1276   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277   //       parallel region to make cancellation barriers work properly.
1278   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1280   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                     HasCancel, OutlinedHelperName);
1282   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284 }
1285 
1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290   return emitParallelOrTeamsOutlinedFunction(
1291       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292 }
1293 
1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298   return emitParallelOrTeamsOutlinedFunction(
1299       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300 }
1301 
1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306     bool Tied, unsigned &NumberOfParts) {
1307   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                               PrePostActionTy &) {
1309     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311     llvm::Value *TaskArgs[] = {
1312         UpLoc, ThreadID,
1313         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                     TaskTVar->getType()->castAs<PointerType>())
1315             .getPointer(CGF)};
1316     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                         TaskArgs);
1319   };
1320   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                             UntiedCodeGen);
1322   CodeGen.setAction(Action);
1323   assert(!ThreadIDVar->getType()->isPointerType() &&
1324          "thread id variable must be of type kmp_int32 for tasks");
1325   const OpenMPDirectiveKind Region =
1326       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                       : OMPD_task;
1328   const CapturedStmt *CS = D.getCapturedStmt(Region);
1329   bool HasCancel = false;
1330   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338 
1339   CodeGenFunction CGF(CGM, true);
1340   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                         InnermostKind, HasCancel, Action);
1342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344   if (!Tied)
1345     NumberOfParts = Action.getNumberOfParts();
1346   return Res;
1347 }
1348 
1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                              const RecordDecl *RD, const CGRecordLayout &RL,
1351                              ArrayRef<llvm::Constant *> Data) {
1352   llvm::StructType *StructTy = RL.getLLVMType();
1353   unsigned PrevIdx = 0;
1354   ConstantInitBuilder CIBuilder(CGM);
1355   auto DI = Data.begin();
1356   for (const FieldDecl *FD : RD->fields()) {
1357     unsigned Idx = RL.getLLVMFieldNo(FD);
1358     // Fill the alignment.
1359     for (unsigned I = PrevIdx; I < Idx; ++I)
1360       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361     PrevIdx = Idx + 1;
1362     Fields.add(*DI);
1363     ++DI;
1364   }
1365 }
1366 
1367 template <class... As>
1368 static llvm::GlobalVariable *
1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                    As &&... Args) {
1372   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374   ConstantInitBuilder CIBuilder(CGM);
1375   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376   buildStructValue(Fields, CGM, RD, RL, Data);
1377   return Fields.finishAndCreateGlobal(
1378       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379       std::forward<As>(Args)...);
1380 }
1381 
1382 template <typename T>
1383 static void
1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                          ArrayRef<llvm::Constant *> Data,
1386                                          T &Parent) {
1387   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390   buildStructValue(Fields, CGM, RD, RL, Data);
1391   Fields.finishAndAddTo(Parent);
1392 }
1393 
1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                              bool AtCurrentPoint) {
1396   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398 
1399   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400   if (AtCurrentPoint) {
1401     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403   } else {
1404     Elem.second.ServiceInsertPt =
1405         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407   }
1408 }
1409 
1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412   if (Elem.second.ServiceInsertPt) {
1413     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414     Elem.second.ServiceInsertPt = nullptr;
1415     Ptr->eraseFromParent();
1416   }
1417 }
1418 
1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                   SourceLocation Loc,
1421                                                   SmallString<128> &Buffer) {
1422   llvm::raw_svector_ostream OS(Buffer);
1423   // Build debug location
1424   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425   OS << ";" << PLoc.getFilename() << ";";
1426   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427     OS << FD->getQualifiedNameAsString();
1428   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429   return OS.str();
1430 }
1431 
1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                  SourceLocation Loc,
1434                                                  unsigned Flags) {
1435   llvm::Constant *SrcLocStr;
1436   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437       Loc.isInvalid()) {
1438     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439   } else {
1440     std::string FunctionName = "";
1441     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442       FunctionName = FD->getQualifiedNameAsString();
1443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444     const char *FileName = PLoc.getFilename();
1445     unsigned Line = PLoc.getLine();
1446     unsigned Column = PLoc.getColumn();
1447     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                 Line, Column);
1449   }
1450   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                      Reserved2Flags);
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                           SourceLocation Loc) {
1457   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459   // the clang invariants used below might be broken.
1460   if (CGM.getLangOpts().OpenMPIRBuilder) {
1461     SmallString<128> Buffer;
1462     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                             : "__kmpc_for_static_init_4u")
1564                                 : (IVSigned ? "__kmpc_for_static_init_8"
1565                                             : "__kmpc_for_static_init_8u");
1566   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568   llvm::Type *TypeParams[] = {
1569     getIdentTyPointerTy(),                     // loc
1570     CGM.Int32Ty,                               // tid
1571     CGM.Int32Ty,                               // schedtype
1572     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573     PtrTy,                                     // p_lower
1574     PtrTy,                                     // p_upper
1575     PtrTy,                                     // p_stride
1576     ITy,                                       // incr
1577     ITy                                        // chunk
1578   };
1579   auto *FnTy =
1580       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581   return CGM.CreateRuntimeFunction(FnTy, Name);
1582 }
1583 
1584 llvm::FunctionCallee
1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586   assert((IVSize == 32 || IVSize == 64) &&
1587          "IV size is not compatible with the omp runtime");
1588   StringRef Name =
1589       IVSize == 32
1590           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                                CGM.Int32Ty,           // tid
1595                                CGM.Int32Ty,           // schedtype
1596                                ITy,                   // lower
1597                                ITy,                   // upper
1598                                ITy,                   // stride
1599                                ITy                    // chunk
1600   };
1601   auto *FnTy =
1602       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603   return CGM.CreateRuntimeFunction(FnTy, Name);
1604 }
1605 
1606 llvm::FunctionCallee
1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608   assert((IVSize == 32 || IVSize == 64) &&
1609          "IV size is not compatible with the omp runtime");
1610   StringRef Name =
1611       IVSize == 32
1612           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614   llvm::Type *TypeParams[] = {
1615       getIdentTyPointerTy(), // loc
1616       CGM.Int32Ty,           // tid
1617   };
1618   auto *FnTy =
1619       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620   return CGM.CreateRuntimeFunction(FnTy, Name);
1621 }
1622 
1623 llvm::FunctionCallee
1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625   assert((IVSize == 32 || IVSize == 64) &&
1626          "IV size is not compatible with the omp runtime");
1627   StringRef Name =
1628       IVSize == 32
1629           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633   llvm::Type *TypeParams[] = {
1634     getIdentTyPointerTy(),                     // loc
1635     CGM.Int32Ty,                               // tid
1636     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637     PtrTy,                                     // p_lower
1638     PtrTy,                                     // p_upper
1639     PtrTy                                      // p_stride
1640   };
1641   auto *FnTy =
1642       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643   return CGM.CreateRuntimeFunction(FnTy, Name);
1644 }
1645 
1646 /// Obtain information that uniquely identifies a target entry. This
1647 /// consists of the file and device IDs as well as line number associated with
1648 /// the relevant entry source location.
1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                      unsigned &DeviceID, unsigned &FileID,
1651                                      unsigned &LineNum) {
1652   SourceManager &SM = C.getSourceManager();
1653 
1654   // The loc should be always valid and have a file ID (the user cannot use
1655   // #pragma directives in macros)
1656 
1657   assert(Loc.isValid() && "Source location is expected to be always valid.");
1658 
1659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661 
1662   llvm::sys::fs::UniqueID ID;
1663   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668           << PLoc.getFilename() << EC.message();
1669   }
1670 
1671   DeviceID = ID.getDevice();
1672   FileID = ID.getFile();
1673   LineNum = PLoc.getLine();
1674 }
1675 
1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677   if (CGM.getLangOpts().OpenMPSimd)
1678     return Address::invalid();
1679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683                HasRequiresUnifiedSharedMemory))) {
1684     SmallString<64> PtrName;
1685     {
1686       llvm::raw_svector_ostream OS(PtrName);
1687       OS << CGM.getMangledName(GlobalDecl(VD));
1688       if (!VD->isExternallyVisible()) {
1689         unsigned DeviceID, FileID, Line;
1690         getTargetEntryUniqueInfo(CGM.getContext(),
1691                                  VD->getCanonicalDecl()->getBeginLoc(),
1692                                  DeviceID, FileID, Line);
1693         OS << llvm::format("_%x", FileID);
1694       }
1695       OS << "_decl_tgt_ref_ptr";
1696     }
1697     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698     if (!Ptr) {
1699       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                         PtrName);
1702 
1703       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705 
1706       if (!CGM.getLangOpts().OpenMPIsDevice)
1707         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709     }
1710     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711   }
1712   return Address::invalid();
1713 }
1714 
1715 llvm::Constant *
1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718          !CGM.getContext().getTargetInfo().isTLSSupported());
1719   // Lookup the entry, lazily creating it if necessary.
1720   std::string Suffix = getName({"cache", ""});
1721   return getOrCreateInternalVariable(
1722       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                 const VarDecl *VD,
1727                                                 Address VDAddr,
1728                                                 SourceLocation Loc) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return VDAddr;
1732 
1733   llvm::Type *VarTy = VDAddr.getElementType();
1734   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                        CGM.Int8PtrTy),
1737                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                          getOrCreateThreadPrivateCache(VD)};
1739   return Address(CGF.EmitRuntimeCall(
1740                      OMPBuilder.getOrCreateRuntimeFunction(
1741                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                      Args),
1743                  VDAddr.getAlignment());
1744 }
1745 
1746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750   // library.
1751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                       OMPLoc);
1755   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756   // to register constructor/destructor for variable.
1757   llvm::Value *Args[] = {
1758       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759       Ctor, CopyCtor, Dtor};
1760   CGF.EmitRuntimeCall(
1761       OMPBuilder.getOrCreateRuntimeFunction(
1762           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763       Args);
1764 }
1765 
1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768     bool PerformInit, CodeGenFunction *CGF) {
1769   if (CGM.getLangOpts().OpenMPUseTLS &&
1770       CGM.getContext().getTargetInfo().isTLSSupported())
1771     return nullptr;
1772 
1773   VD = VD->getDefinition(CGM.getContext());
1774   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775     QualType ASTTy = VD->getType();
1776 
1777     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778     const Expr *Init = VD->getAnyInitializer();
1779     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780       // Generate function that re-emits the declaration's initializer into the
1781       // threadprivate copy of the variable VD
1782       CodeGenFunction CtorCGF(CGM);
1783       FunctionArgList Args;
1784       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                             ImplicitParamDecl::Other);
1787       Args.push_back(&Dst);
1788 
1789       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790           CGM.getContext().VoidPtrTy, Args);
1791       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792       std::string Name = getName({"__kmpc_global_ctor_", ""});
1793       llvm::Function *Fn =
1794           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                             Args, Loc, Loc);
1797       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799           CGM.getContext().VoidPtrTy, Dst.getLocation());
1800       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801       Arg = CtorCGF.Builder.CreateElementBitCast(
1802           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                                /*IsInitializer=*/true);
1805       ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809       CtorCGF.FinishFunction();
1810       Ctor = Fn;
1811     }
1812     if (VD->getType().isDestructedType() != QualType::DK_none) {
1813       // Generate function that emits destructor call for the threadprivate copy
1814       // of the variable VD
1815       CodeGenFunction DtorCGF(CGM);
1816       FunctionArgList Args;
1817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                             ImplicitParamDecl::Other);
1820       Args.push_back(&Dst);
1821 
1822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825       std::string Name = getName({"__kmpc_global_dtor_", ""});
1826       llvm::Function *Fn =
1827           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                             Loc, Loc);
1831       // Create a scope with an artificial location for the body of this function.
1832       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834           DtorCGF.GetAddrOfLocalVar(&Dst),
1835           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839       DtorCGF.FinishFunction();
1840       Dtor = Fn;
1841     }
1842     // Do not emit init function if it is not required.
1843     if (!Ctor && !Dtor)
1844       return nullptr;
1845 
1846     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                                /*isVarArg=*/false)
1849                            ->getPointerTo();
1850     // Copying constructor for the threadprivate variable.
1851     // Must be NULL - reserved by runtime, but currently it requires that this
1852     // parameter is always NULL. Otherwise it fires assertion.
1853     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854     if (Ctor == nullptr) {
1855       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Dtor = llvm::Constant::getNullValue(DtorTy);
1865     }
1866     if (!CGF) {
1867       auto *InitFunctionTy =
1868           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869       std::string Name = getName({"__omp_threadprivate_init_", ""});
1870       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872       CodeGenFunction InitCGF(CGM);
1873       FunctionArgList ArgList;
1874       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                             Loc, Loc);
1877       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878       InitCGF.FinishFunction();
1879       return InitFunction;
1880     }
1881     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882   }
1883   return nullptr;
1884 }
1885 
1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                      llvm::GlobalVariable *Addr,
1888                                                      bool PerformInit) {
1889   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890       !CGM.getLangOpts().OpenMPIsDevice)
1891     return false;
1892   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896        HasRequiresUnifiedSharedMemory))
1897     return CGM.getLangOpts().OpenMPIsDevice;
1898   VD = VD->getDefinition(CGM.getContext());
1899   assert(VD && "Unknown VarDecl");
1900 
1901   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903 
1904   QualType ASTTy = VD->getType();
1905   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906 
1907   // Produce the unique prefix to identify the new target regions. We use
1908   // the source location of the variable declaration which we know to not
1909   // conflict with any target region.
1910   unsigned DeviceID;
1911   unsigned FileID;
1912   unsigned Line;
1913   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914   SmallString<128> Buffer, Out;
1915   {
1916     llvm::raw_svector_ostream OS(Buffer);
1917     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919   }
1920 
1921   const Expr *Init = VD->getAnyInitializer();
1922   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923     llvm::Constant *Ctor;
1924     llvm::Constant *ID;
1925     if (CGM.getLangOpts().OpenMPIsDevice) {
1926       // Generate function that re-emits the declaration's initializer into
1927       // the threadprivate copy of the variable VD
1928       CodeGenFunction CtorCGF(CGM);
1929 
1930       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938       CtorCGF.EmitAnyExprToMem(Init,
1939                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                                Init->getType().getQualifiers(),
1941                                /*IsInitializer=*/true);
1942       CtorCGF.FinishFunction();
1943       Ctor = Fn;
1944       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946     } else {
1947       Ctor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951       ID = Ctor;
1952     }
1953 
1954     // Register the information for the entry associated with the constructor.
1955     Out.clear();
1956     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959   }
1960   if (VD->getType().isDestructedType() != QualType::DK_none) {
1961     llvm::Constant *Dtor;
1962     llvm::Constant *ID;
1963     if (CGM.getLangOpts().OpenMPIsDevice) {
1964       // Generate function that emits destructor call for the threadprivate
1965       // copy of the variable VD
1966       CodeGenFunction DtorCGF(CGM);
1967 
1968       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                             FunctionArgList(), Loc, Loc);
1975       // Create a scope with an artificial location for the body of this
1976       // function.
1977       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981       DtorCGF.FinishFunction();
1982       Dtor = Fn;
1983       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985     } else {
1986       Dtor = new llvm::GlobalVariable(
1987           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988           llvm::GlobalValue::PrivateLinkage,
1989           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990       ID = Dtor;
1991     }
1992     // Register the information for the entry associated with the destructor.
1993     Out.clear();
1994     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997   }
1998   return CGM.getLangOpts().OpenMPIsDevice;
1999 }
2000 
2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                           QualType VarType,
2003                                                           StringRef Name) {
2004   std::string Suffix = getName({"artificial", ""});
2005   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006   llvm::Value *GAddr =
2007       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009       CGM.getTarget().isTLSSupported()) {
2010     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012   }
2013   std::string CacheSuffix = getName({"cache", ""});
2014   llvm::Value *Args[] = {
2015       emitUpdateLocation(CGF, SourceLocation()),
2016       getThreadID(CGF, SourceLocation()),
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                 /*isSigned=*/false),
2020       getOrCreateInternalVariable(
2021           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022   return Address(
2023       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024           CGF.EmitRuntimeCall(
2025               OMPBuilder.getOrCreateRuntimeFunction(
2026                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027               Args),
2028           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029       CGM.getContext().getTypeAlignInChars(VarType));
2030 }
2031 
2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                    const RegionCodeGenTy &ThenGen,
2034                                    const RegionCodeGenTy &ElseGen) {
2035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036 
2037   // If the condition constant folds and can be elided, try to avoid emitting
2038   // the condition and the dead arm of the if/else.
2039   bool CondConstant;
2040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041     if (CondConstant)
2042       ThenGen(CGF);
2043     else
2044       ElseGen(CGF);
2045     return;
2046   }
2047 
2048   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049   // emit the conditional branch.
2050   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054 
2055   // Emit the 'then' code.
2056   CGF.EmitBlock(ThenBlock);
2057   ThenGen(CGF);
2058   CGF.EmitBranch(ContBlock);
2059   // Emit the 'else' code if present.
2060   // There is no need to emit line number for unconditional branch.
2061   (void)ApplyDebugLocation::CreateEmpty(CGF);
2062   CGF.EmitBlock(ElseBlock);
2063   ElseGen(CGF);
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBranch(ContBlock);
2067   // Emit the continuation block for code after the if.
2068   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 }
2070 
2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                        llvm::Function *OutlinedFn,
2073                                        ArrayRef<llvm::Value *> CapturedVars,
2074                                        const Expr *IfCond) {
2075   if (!CGF.HaveInsertPoint())
2076     return;
2077   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078   auto &M = CGM.getModule();
2079   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083     llvm::Value *Args[] = {
2084         RTLoc,
2085         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088     RealArgs.append(std::begin(Args), std::end(Args));
2089     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 
2091     llvm::FunctionCallee RTLFn =
2092         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094   };
2095   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099     // Build calls:
2100     // __kmpc_serialized_parallel(&Loc, GTid);
2101     llvm::Value *Args[] = {RTLoc, ThreadID};
2102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                             M, OMPRTL___kmpc_serialized_parallel),
2104                         Args);
2105 
2106     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108     Address ZeroAddrBound =
2109         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                          /*Name=*/".bound.zero.addr");
2111     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113     // ThreadId for serialized parallels is 0.
2114     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 
2118     // Ensure we do not inline the function. This is trivially true for the ones
2119     // passed to __kmpc_fork_call but the ones calles in serialized regions
2120     // could be inlined. This is not a perfect but it is closer to the invariant
2121     // we want, namely, every data environment starts with a new function.
2122     // TODO: We should pass the if condition to the runtime function and do the
2123     //       handling there. Much cleaner code.
2124     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126 
2127     // __kmpc_end_serialized_parallel(&Loc, GTid);
2128     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                             M, OMPRTL___kmpc_end_serialized_parallel),
2131                         EndArgs);
2132   };
2133   if (IfCond) {
2134     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135   } else {
2136     RegionCodeGenTy ThenRCG(ThenGen);
2137     ThenRCG(CGF);
2138   }
2139 }
2140 
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                              SourceLocation Loc) {
2149   if (auto *OMPRegionInfo =
2150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151     if (OMPRegionInfo->getThreadIDVariable())
2152       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153 
2154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155   QualType Int32Ty =
2156       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158   CGF.EmitStoreOfScalar(ThreadID,
2159                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160 
2161   return ThreadIDTemp;
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166   SmallString<256> Buffer;
2167   llvm::raw_svector_ostream Out(Buffer);
2168   Out << Name;
2169   StringRef RuntimeName = Out.str();
2170   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171   if (Elem.second) {
2172     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173            "OMP internal variable has different type than requested");
2174     return &*Elem.second;
2175   }
2176 
2177   return Elem.second = new llvm::GlobalVariable(
2178              CGM.getModule(), Ty, /*IsConstant*/ false,
2179              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180              Elem.first(), /*InsertBefore=*/nullptr,
2181              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182 }
2183 
2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186   std::string Name = getName({Prefix, "var"});
2187   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188 }
2189 
2190 namespace {
2191 /// Common pre(post)-action for different OpenMP constructs.
2192 class CommonActionTy final : public PrePostActionTy {
2193   llvm::FunctionCallee EnterCallee;
2194   ArrayRef<llvm::Value *> EnterArgs;
2195   llvm::FunctionCallee ExitCallee;
2196   ArrayRef<llvm::Value *> ExitArgs;
2197   bool Conditional;
2198   llvm::BasicBlock *ContBlock = nullptr;
2199 
2200 public:
2201   CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                  ArrayRef<llvm::Value *> EnterArgs,
2203                  llvm::FunctionCallee ExitCallee,
2204                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206         ExitArgs(ExitArgs), Conditional(Conditional) {}
2207   void Enter(CodeGenFunction &CGF) override {
2208     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209     if (Conditional) {
2210       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212       ContBlock = CGF.createBasicBlock("omp_if.end");
2213       // Generate the branch (If-stmt)
2214       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215       CGF.EmitBlock(ThenBlock);
2216     }
2217   }
2218   void Done(CodeGenFunction &CGF) {
2219     // Emit the rest of blocks/branches
2220     CGF.EmitBranch(ContBlock);
2221     CGF.EmitBlock(ContBlock, true);
2222   }
2223   void Exit(CodeGenFunction &CGF) override {
2224     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225   }
2226 };
2227 } // anonymous namespace
2228 
2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                          StringRef CriticalName,
2231                                          const RegionCodeGenTy &CriticalOpGen,
2232                                          SourceLocation Loc, const Expr *Hint) {
2233   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234   // CriticalOpGen();
2235   // __kmpc_end_critical(ident_t *, gtid, Lock);
2236   // Prepare arguments and build a call to __kmpc_critical
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                          getCriticalRegionLock(CriticalName)};
2241   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                 std::end(Args));
2243   if (Hint) {
2244     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246   }
2247   CommonActionTy Action(
2248       OMPBuilder.getOrCreateRuntimeFunction(
2249           CGM.getModule(),
2250           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251       EnterArgs,
2252       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                             OMPRTL___kmpc_end_critical),
2254       Args);
2255   CriticalOpGen.setAction(Action);
2256   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257 }
2258 
2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                        const RegionCodeGenTy &MasterOpGen,
2261                                        SourceLocation Loc) {
2262   if (!CGF.HaveInsertPoint())
2263     return;
2264   // if(__kmpc_master(ident_t *, gtid)) {
2265   //   MasterOpGen();
2266   //   __kmpc_end_master(ident_t *, gtid);
2267   // }
2268   // Prepare arguments and build a call to __kmpc_master
2269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                             CGM.getModule(), OMPRTL___kmpc_master),
2272                         Args,
2273                         OMPBuilder.getOrCreateRuntimeFunction(
2274                             CGM.getModule(), OMPRTL___kmpc_end_master),
2275                         Args,
2276                         /*Conditional=*/true);
2277   MasterOpGen.setAction(Action);
2278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279   Action.Done(CGF);
2280 }
2281 
2282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2283                                         SourceLocation Loc) {
2284   if (!CGF.HaveInsertPoint())
2285     return;
2286   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2287     OMPBuilder.createTaskyield(CGF.Builder);
2288   } else {
2289     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2290     llvm::Value *Args[] = {
2291         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2292         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2293     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2294                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2295                         Args);
2296   }
2297 
2298   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2299     Region->emitUntiedSwitch(CGF);
2300 }
2301 
2302 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2303                                           const RegionCodeGenTy &TaskgroupOpGen,
2304                                           SourceLocation Loc) {
2305   if (!CGF.HaveInsertPoint())
2306     return;
2307   // __kmpc_taskgroup(ident_t *, gtid);
2308   // TaskgroupOpGen();
2309   // __kmpc_end_taskgroup(ident_t *, gtid);
2310   // Prepare arguments and build a call to __kmpc_taskgroup
2311   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2312   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2314                         Args,
2315                         OMPBuilder.getOrCreateRuntimeFunction(
2316                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2317                         Args);
2318   TaskgroupOpGen.setAction(Action);
2319   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2320 }
2321 
2322 /// Given an array of pointers to variables, project the address of a
2323 /// given variable.
2324 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2325                                       unsigned Index, const VarDecl *Var) {
2326   // Pull out the pointer to the variable.
2327   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2328   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2329 
2330   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2331   Addr = CGF.Builder.CreateElementBitCast(
2332       Addr, CGF.ConvertTypeForMem(Var->getType()));
2333   return Addr;
2334 }
2335 
2336 static llvm::Value *emitCopyprivateCopyFunction(
2337     CodeGenModule &CGM, llvm::Type *ArgsType,
2338     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340     SourceLocation Loc) {
2341   ASTContext &C = CGM.getContext();
2342   // void copy_func(void *LHSArg, void *RHSArg);
2343   FunctionArgList Args;
2344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345                            ImplicitParamDecl::Other);
2346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347                            ImplicitParamDecl::Other);
2348   Args.push_back(&LHSArg);
2349   Args.push_back(&RHSArg);
2350   const auto &CGFI =
2351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352   std::string Name =
2353       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355                                     llvm::GlobalValue::InternalLinkage, Name,
2356                                     &CGM.getModule());
2357   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358   Fn->setDoesNotRecurse();
2359   CodeGenFunction CGF(CGM);
2360   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361   // Dest = (void*[n])(LHSArg);
2362   // Src = (void*[n])(RHSArg);
2363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365       ArgsType), CGF.getPointerAlign());
2366   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2367       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2368       ArgsType), CGF.getPointerAlign());
2369   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2370   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2371   // ...
2372   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2373   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2374     const auto *DestVar =
2375         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2376     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2377 
2378     const auto *SrcVar =
2379         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2380     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2381 
2382     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2383     QualType Type = VD->getType();
2384     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2385   }
2386   CGF.FinishFunction();
2387   return Fn;
2388 }
2389 
2390 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2391                                        const RegionCodeGenTy &SingleOpGen,
2392                                        SourceLocation Loc,
2393                                        ArrayRef<const Expr *> CopyprivateVars,
2394                                        ArrayRef<const Expr *> SrcExprs,
2395                                        ArrayRef<const Expr *> DstExprs,
2396                                        ArrayRef<const Expr *> AssignmentOps) {
2397   if (!CGF.HaveInsertPoint())
2398     return;
2399   assert(CopyprivateVars.size() == SrcExprs.size() &&
2400          CopyprivateVars.size() == DstExprs.size() &&
2401          CopyprivateVars.size() == AssignmentOps.size());
2402   ASTContext &C = CGM.getContext();
2403   // int32 did_it = 0;
2404   // if(__kmpc_single(ident_t *, gtid)) {
2405   //   SingleOpGen();
2406   //   __kmpc_end_single(ident_t *, gtid);
2407   //   did_it = 1;
2408   // }
2409   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2410   // <copy_func>, did_it);
2411 
2412   Address DidIt = Address::invalid();
2413   if (!CopyprivateVars.empty()) {
2414     // int32 did_it = 0;
2415     QualType KmpInt32Ty =
2416         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2417     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2418     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2419   }
2420   // Prepare arguments and build a call to __kmpc_single
2421   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2422   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2423                             CGM.getModule(), OMPRTL___kmpc_single),
2424                         Args,
2425                         OMPBuilder.getOrCreateRuntimeFunction(
2426                             CGM.getModule(), OMPRTL___kmpc_end_single),
2427                         Args,
2428                         /*Conditional=*/true);
2429   SingleOpGen.setAction(Action);
2430   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2431   if (DidIt.isValid()) {
2432     // did_it = 1;
2433     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2434   }
2435   Action.Done(CGF);
2436   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2437   // <copy_func>, did_it);
2438   if (DidIt.isValid()) {
2439     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2440     QualType CopyprivateArrayTy = C.getConstantArrayType(
2441         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2442         /*IndexTypeQuals=*/0);
2443     // Create a list of all private variables for copyprivate.
2444     Address CopyprivateList =
2445         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2446     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2447       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2448       CGF.Builder.CreateStore(
2449           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2450               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2451               CGF.VoidPtrTy),
2452           Elem);
2453     }
2454     // Build function that copies private values from single region to all other
2455     // threads in the corresponding parallel region.
2456     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2457         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2458         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2459     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2460     Address CL =
2461       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2462                                                       CGF.VoidPtrTy);
2463     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2464     llvm::Value *Args[] = {
2465         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2466         getThreadID(CGF, Loc),        // i32 <gtid>
2467         BufSize,                      // size_t <buf_size>
2468         CL.getPointer(),              // void *<copyprivate list>
2469         CpyFn,                        // void (*) (void *, void *) <copy_func>
2470         DidItVal                      // i32 did_it
2471     };
2472     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2473                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2474                         Args);
2475   }
2476 }
2477 
2478 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2479                                         const RegionCodeGenTy &OrderedOpGen,
2480                                         SourceLocation Loc, bool IsThreads) {
2481   if (!CGF.HaveInsertPoint())
2482     return;
2483   // __kmpc_ordered(ident_t *, gtid);
2484   // OrderedOpGen();
2485   // __kmpc_end_ordered(ident_t *, gtid);
2486   // Prepare arguments and build a call to __kmpc_ordered
2487   if (IsThreads) {
2488     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2489     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2490                               CGM.getModule(), OMPRTL___kmpc_ordered),
2491                           Args,
2492                           OMPBuilder.getOrCreateRuntimeFunction(
2493                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2494                           Args);
2495     OrderedOpGen.setAction(Action);
2496     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2497     return;
2498   }
2499   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2500 }
2501 
2502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2503   unsigned Flags;
2504   if (Kind == OMPD_for)
2505     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2506   else if (Kind == OMPD_sections)
2507     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2508   else if (Kind == OMPD_single)
2509     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2510   else if (Kind == OMPD_barrier)
2511     Flags = OMP_IDENT_BARRIER_EXPL;
2512   else
2513     Flags = OMP_IDENT_BARRIER_IMPL;
2514   return Flags;
2515 }
2516 
2517 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2518     CodeGenFunction &CGF, const OMPLoopDirective &S,
2519     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2520   // Check if the loop directive is actually a doacross loop directive. In this
2521   // case choose static, 1 schedule.
2522   if (llvm::any_of(
2523           S.getClausesOfKind<OMPOrderedClause>(),
2524           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2525     ScheduleKind = OMPC_SCHEDULE_static;
2526     // Chunk size is 1 in this case.
2527     llvm::APInt ChunkSize(32, 1);
2528     ChunkExpr = IntegerLiteral::Create(
2529         CGF.getContext(), ChunkSize,
2530         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2531         SourceLocation());
2532   }
2533 }
2534 
2535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2536                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2537                                       bool ForceSimpleCall) {
2538   // Check if we should use the OMPBuilder
2539   auto *OMPRegionInfo =
2540       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2541   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2542     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2543         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2544     return;
2545   }
2546 
2547   if (!CGF.HaveInsertPoint())
2548     return;
2549   // Build call __kmpc_cancel_barrier(loc, thread_id);
2550   // Build call __kmpc_barrier(loc, thread_id);
2551   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2552   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2553   // thread_id);
2554   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2555                          getThreadID(CGF, Loc)};
2556   if (OMPRegionInfo) {
2557     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2558       llvm::Value *Result = CGF.EmitRuntimeCall(
2559           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2560                                                 OMPRTL___kmpc_cancel_barrier),
2561           Args);
2562       if (EmitChecks) {
2563         // if (__kmpc_cancel_barrier()) {
2564         //   exit from construct;
2565         // }
2566         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2567         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2568         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2569         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2570         CGF.EmitBlock(ExitBB);
2571         //   exit from construct;
2572         CodeGenFunction::JumpDest CancelDestination =
2573             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2574         CGF.EmitBranchThroughCleanup(CancelDestination);
2575         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2576       }
2577       return;
2578     }
2579   }
2580   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2581                           CGM.getModule(), OMPRTL___kmpc_barrier),
2582                       Args);
2583 }
2584 
2585 /// Map the OpenMP loop schedule to the runtime enumeration.
2586 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2587                                           bool Chunked, bool Ordered) {
2588   switch (ScheduleKind) {
2589   case OMPC_SCHEDULE_static:
2590     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2591                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2592   case OMPC_SCHEDULE_dynamic:
2593     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2594   case OMPC_SCHEDULE_guided:
2595     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2596   case OMPC_SCHEDULE_runtime:
2597     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2598   case OMPC_SCHEDULE_auto:
2599     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2600   case OMPC_SCHEDULE_unknown:
2601     assert(!Chunked && "chunk was specified but schedule kind not known");
2602     return Ordered ? OMP_ord_static : OMP_sch_static;
2603   }
2604   llvm_unreachable("Unexpected runtime schedule");
2605 }
2606 
2607 /// Map the OpenMP distribute schedule to the runtime enumeration.
2608 static OpenMPSchedType
2609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2610   // only static is allowed for dist_schedule
2611   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2612 }
2613 
2614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2615                                          bool Chunked) const {
2616   OpenMPSchedType Schedule =
2617       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2618   return Schedule == OMP_sch_static;
2619 }
2620 
2621 bool CGOpenMPRuntime::isStaticNonchunked(
2622     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2623   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2624   return Schedule == OMP_dist_sch_static;
2625 }
2626 
2627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2628                                       bool Chunked) const {
2629   OpenMPSchedType Schedule =
2630       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2631   return Schedule == OMP_sch_static_chunked;
2632 }
2633 
2634 bool CGOpenMPRuntime::isStaticChunked(
2635     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2636   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2637   return Schedule == OMP_dist_sch_static_chunked;
2638 }
2639 
2640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2641   OpenMPSchedType Schedule =
2642       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2643   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2644   return Schedule != OMP_sch_static;
2645 }
2646 
2647 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2648                                   OpenMPScheduleClauseModifier M1,
2649                                   OpenMPScheduleClauseModifier M2) {
2650   int Modifier = 0;
2651   switch (M1) {
2652   case OMPC_SCHEDULE_MODIFIER_monotonic:
2653     Modifier = OMP_sch_modifier_monotonic;
2654     break;
2655   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2656     Modifier = OMP_sch_modifier_nonmonotonic;
2657     break;
2658   case OMPC_SCHEDULE_MODIFIER_simd:
2659     if (Schedule == OMP_sch_static_chunked)
2660       Schedule = OMP_sch_static_balanced_chunked;
2661     break;
2662   case OMPC_SCHEDULE_MODIFIER_last:
2663   case OMPC_SCHEDULE_MODIFIER_unknown:
2664     break;
2665   }
2666   switch (M2) {
2667   case OMPC_SCHEDULE_MODIFIER_monotonic:
2668     Modifier = OMP_sch_modifier_monotonic;
2669     break;
2670   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2671     Modifier = OMP_sch_modifier_nonmonotonic;
2672     break;
2673   case OMPC_SCHEDULE_MODIFIER_simd:
2674     if (Schedule == OMP_sch_static_chunked)
2675       Schedule = OMP_sch_static_balanced_chunked;
2676     break;
2677   case OMPC_SCHEDULE_MODIFIER_last:
2678   case OMPC_SCHEDULE_MODIFIER_unknown:
2679     break;
2680   }
2681   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2682   // If the static schedule kind is specified or if the ordered clause is
2683   // specified, and if the nonmonotonic modifier is not specified, the effect is
2684   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2685   // modifier is specified, the effect is as if the nonmonotonic modifier is
2686   // specified.
2687   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2688     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2689           Schedule == OMP_sch_static_balanced_chunked ||
2690           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2691           Schedule == OMP_dist_sch_static_chunked ||
2692           Schedule == OMP_dist_sch_static))
2693       Modifier = OMP_sch_modifier_nonmonotonic;
2694   }
2695   return Schedule | Modifier;
2696 }
2697 
2698 void CGOpenMPRuntime::emitForDispatchInit(
2699     CodeGenFunction &CGF, SourceLocation Loc,
2700     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2701     bool Ordered, const DispatchRTInput &DispatchValues) {
2702   if (!CGF.HaveInsertPoint())
2703     return;
2704   OpenMPSchedType Schedule = getRuntimeSchedule(
2705       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2706   assert(Ordered ||
2707          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2708           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2709           Schedule != OMP_sch_static_balanced_chunked));
2710   // Call __kmpc_dispatch_init(
2711   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2712   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2713   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2714 
2715   // If the Chunk was not specified in the clause - use default value 1.
2716   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2717                                             : CGF.Builder.getIntN(IVSize, 1);
2718   llvm::Value *Args[] = {
2719       emitUpdateLocation(CGF, Loc),
2720       getThreadID(CGF, Loc),
2721       CGF.Builder.getInt32(addMonoNonMonoModifier(
2722           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2723       DispatchValues.LB,                                     // Lower
2724       DispatchValues.UB,                                     // Upper
2725       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2726       Chunk                                                  // Chunk
2727   };
2728   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2729 }
2730 
2731 static void emitForStaticInitCall(
2732     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2733     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2734     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2735     const CGOpenMPRuntime::StaticRTInput &Values) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738 
2739   assert(!Values.Ordered);
2740   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2741          Schedule == OMP_sch_static_balanced_chunked ||
2742          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2743          Schedule == OMP_dist_sch_static ||
2744          Schedule == OMP_dist_sch_static_chunked);
2745 
2746   // Call __kmpc_for_static_init(
2747   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2748   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2749   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2750   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2751   llvm::Value *Chunk = Values.Chunk;
2752   if (Chunk == nullptr) {
2753     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2754             Schedule == OMP_dist_sch_static) &&
2755            "expected static non-chunked schedule");
2756     // If the Chunk was not specified in the clause - use default value 1.
2757     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2758   } else {
2759     assert((Schedule == OMP_sch_static_chunked ||
2760             Schedule == OMP_sch_static_balanced_chunked ||
2761             Schedule == OMP_ord_static_chunked ||
2762             Schedule == OMP_dist_sch_static_chunked) &&
2763            "expected static chunked schedule");
2764   }
2765   llvm::Value *Args[] = {
2766       UpdateLocation,
2767       ThreadId,
2768       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2769                                                   M2)), // Schedule type
2770       Values.IL.getPointer(),                           // &isLastIter
2771       Values.LB.getPointer(),                           // &LB
2772       Values.UB.getPointer(),                           // &UB
2773       Values.ST.getPointer(),                           // &Stride
2774       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2775       Chunk                                             // Chunk
2776   };
2777   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2778 }
2779 
2780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2781                                         SourceLocation Loc,
2782                                         OpenMPDirectiveKind DKind,
2783                                         const OpenMPScheduleTy &ScheduleKind,
2784                                         const StaticRTInput &Values) {
2785   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2786       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2787   assert(isOpenMPWorksharingDirective(DKind) &&
2788          "Expected loop-based or sections-based directive.");
2789   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2790                                              isOpenMPLoopDirective(DKind)
2791                                                  ? OMP_IDENT_WORK_LOOP
2792                                                  : OMP_IDENT_WORK_SECTIONS);
2793   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2794   llvm::FunctionCallee StaticInitFunction =
2795       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2796   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2797   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2798                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2799 }
2800 
2801 void CGOpenMPRuntime::emitDistributeStaticInit(
2802     CodeGenFunction &CGF, SourceLocation Loc,
2803     OpenMPDistScheduleClauseKind SchedKind,
2804     const CGOpenMPRuntime::StaticRTInput &Values) {
2805   OpenMPSchedType ScheduleNum =
2806       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2807   llvm::Value *UpdatedLocation =
2808       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2809   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2810   llvm::FunctionCallee StaticInitFunction =
2811       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2812   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2813                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2814                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2815 }
2816 
2817 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2818                                           SourceLocation Loc,
2819                                           OpenMPDirectiveKind DKind) {
2820   if (!CGF.HaveInsertPoint())
2821     return;
2822   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2823   llvm::Value *Args[] = {
2824       emitUpdateLocation(CGF, Loc,
2825                          isOpenMPDistributeDirective(DKind)
2826                              ? OMP_IDENT_WORK_DISTRIBUTE
2827                              : isOpenMPLoopDirective(DKind)
2828                                    ? OMP_IDENT_WORK_LOOP
2829                                    : OMP_IDENT_WORK_SECTIONS),
2830       getThreadID(CGF, Loc)};
2831   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2832   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2833                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2834                       Args);
2835 }
2836 
2837 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2838                                                  SourceLocation Loc,
2839                                                  unsigned IVSize,
2840                                                  bool IVSigned) {
2841   if (!CGF.HaveInsertPoint())
2842     return;
2843   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2844   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2845   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2846 }
2847 
2848 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2849                                           SourceLocation Loc, unsigned IVSize,
2850                                           bool IVSigned, Address IL,
2851                                           Address LB, Address UB,
2852                                           Address ST) {
2853   // Call __kmpc_dispatch_next(
2854   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2855   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2856   //          kmp_int[32|64] *p_stride);
2857   llvm::Value *Args[] = {
2858       emitUpdateLocation(CGF, Loc),
2859       getThreadID(CGF, Loc),
2860       IL.getPointer(), // &isLastIter
2861       LB.getPointer(), // &Lower
2862       UB.getPointer(), // &Upper
2863       ST.getPointer()  // &Stride
2864   };
2865   llvm::Value *Call =
2866       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2867   return CGF.EmitScalarConversion(
2868       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2869       CGF.getContext().BoolTy, Loc);
2870 }
2871 
2872 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2873                                            llvm::Value *NumThreads,
2874                                            SourceLocation Loc) {
2875   if (!CGF.HaveInsertPoint())
2876     return;
2877   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2878   llvm::Value *Args[] = {
2879       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2880       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2881   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2882                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2883                       Args);
2884 }
2885 
2886 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2887                                          ProcBindKind ProcBind,
2888                                          SourceLocation Loc) {
2889   if (!CGF.HaveInsertPoint())
2890     return;
2891   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2892   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2893   llvm::Value *Args[] = {
2894       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2895       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2896   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2897                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2898                       Args);
2899 }
2900 
2901 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2902                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2903   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2904     OMPBuilder.createFlush(CGF.Builder);
2905   } else {
2906     if (!CGF.HaveInsertPoint())
2907       return;
2908     // Build call void __kmpc_flush(ident_t *loc)
2909     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                             CGM.getModule(), OMPRTL___kmpc_flush),
2911                         emitUpdateLocation(CGF, Loc));
2912   }
2913 }
2914 
2915 namespace {
2916 /// Indexes of fields for type kmp_task_t.
2917 enum KmpTaskTFields {
2918   /// List of shared variables.
2919   KmpTaskTShareds,
2920   /// Task routine.
2921   KmpTaskTRoutine,
2922   /// Partition id for the untied tasks.
2923   KmpTaskTPartId,
2924   /// Function with call of destructors for private variables.
2925   Data1,
2926   /// Task priority.
2927   Data2,
2928   /// (Taskloops only) Lower bound.
2929   KmpTaskTLowerBound,
2930   /// (Taskloops only) Upper bound.
2931   KmpTaskTUpperBound,
2932   /// (Taskloops only) Stride.
2933   KmpTaskTStride,
2934   /// (Taskloops only) Is last iteration flag.
2935   KmpTaskTLastIter,
2936   /// (Taskloops only) Reduction data.
2937   KmpTaskTReductions,
2938 };
2939 } // anonymous namespace
2940 
2941 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2942   return OffloadEntriesTargetRegion.empty() &&
2943          OffloadEntriesDeviceGlobalVar.empty();
2944 }
2945 
2946 /// Initialize target region entry.
2947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2948     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2949                                     StringRef ParentName, unsigned LineNum,
2950                                     unsigned Order) {
2951   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2952                                              "only required for the device "
2953                                              "code generation.");
2954   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2955       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2956                                    OMPTargetRegionEntryTargetRegion);
2957   ++OffloadingEntriesNum;
2958 }
2959 
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962                                   StringRef ParentName, unsigned LineNum,
2963                                   llvm::Constant *Addr, llvm::Constant *ID,
2964                                   OMPTargetRegionEntryKind Flags) {
2965   // If we are emitting code for a target, the entry is already initialized,
2966   // only has to be registered.
2967   if (CGM.getLangOpts().OpenMPIsDevice) {
2968     // This could happen if the device compilation is invoked standalone.
2969     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2970       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2971                                       OffloadingEntriesNum);
2972     auto &Entry =
2973         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2974     Entry.setAddress(Addr);
2975     Entry.setID(ID);
2976     Entry.setFlags(Flags);
2977   } else {
2978     if (Flags ==
2979             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2980         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2981                                  /*IgnoreAddressId*/ true))
2982       return;
2983     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2984            "Target region entry already registered!");
2985     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2986     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2987     ++OffloadingEntriesNum;
2988   }
2989 }
2990 
2991 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2992     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2993     bool IgnoreAddressId) const {
2994   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2995   if (PerDevice == OffloadEntriesTargetRegion.end())
2996     return false;
2997   auto PerFile = PerDevice->second.find(FileID);
2998   if (PerFile == PerDevice->second.end())
2999     return false;
3000   auto PerParentName = PerFile->second.find(ParentName);
3001   if (PerParentName == PerFile->second.end())
3002     return false;
3003   auto PerLine = PerParentName->second.find(LineNum);
3004   if (PerLine == PerParentName->second.end())
3005     return false;
3006   // Fail if this entry is already registered.
3007   if (!IgnoreAddressId &&
3008       (PerLine->second.getAddress() || PerLine->second.getID()))
3009     return false;
3010   return true;
3011 }
3012 
3013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3014     const OffloadTargetRegionEntryInfoActTy &Action) {
3015   // Scan all target region entries and perform the provided action.
3016   for (const auto &D : OffloadEntriesTargetRegion)
3017     for (const auto &F : D.second)
3018       for (const auto &P : F.second)
3019         for (const auto &L : P.second)
3020           Action(D.first, F.first, P.first(), L.first, L.second);
3021 }
3022 
3023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3024     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3025                                        OMPTargetGlobalVarEntryKind Flags,
3026                                        unsigned Order) {
3027   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3028                                              "only required for the device "
3029                                              "code generation.");
3030   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3031   ++OffloadingEntriesNum;
3032 }
3033 
3034 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3035     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3036                                      CharUnits VarSize,
3037                                      OMPTargetGlobalVarEntryKind Flags,
3038                                      llvm::GlobalValue::LinkageTypes Linkage) {
3039   if (CGM.getLangOpts().OpenMPIsDevice) {
3040     // This could happen if the device compilation is invoked standalone.
3041     if (!hasDeviceGlobalVarEntryInfo(VarName))
3042       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3043     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3044     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3045            "Resetting with the new address.");
3046     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3047       if (Entry.getVarSize().isZero()) {
3048         Entry.setVarSize(VarSize);
3049         Entry.setLinkage(Linkage);
3050       }
3051       return;
3052     }
3053     Entry.setVarSize(VarSize);
3054     Entry.setLinkage(Linkage);
3055     Entry.setAddress(Addr);
3056   } else {
3057     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3058       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3059       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3060              "Entry not initialized!");
3061       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3062              "Resetting with the new address.");
3063       if (Entry.getVarSize().isZero()) {
3064         Entry.setVarSize(VarSize);
3065         Entry.setLinkage(Linkage);
3066       }
3067       return;
3068     }
3069     OffloadEntriesDeviceGlobalVar.try_emplace(
3070         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3071     ++OffloadingEntriesNum;
3072   }
3073 }
3074 
3075 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3076     actOnDeviceGlobalVarEntriesInfo(
3077         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3078   // Scan all target region entries and perform the provided action.
3079   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3080     Action(E.getKey(), E.getValue());
3081 }
3082 
3083 void CGOpenMPRuntime::createOffloadEntry(
3084     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3085     llvm::GlobalValue::LinkageTypes Linkage) {
3086   StringRef Name = Addr->getName();
3087   llvm::Module &M = CGM.getModule();
3088   llvm::LLVMContext &C = M.getContext();
3089 
3090   // Create constant string with the name.
3091   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3092 
3093   std::string StringName = getName({"omp_offloading", "entry_name"});
3094   auto *Str = new llvm::GlobalVariable(
3095       M, StrPtrInit->getType(), /*isConstant=*/true,
3096       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3097   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3098 
3099   llvm::Constant *Data[] = {
3100       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3101       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3102       llvm::ConstantInt::get(CGM.SizeTy, Size),
3103       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3104       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3105   std::string EntryName = getName({"omp_offloading", "entry", ""});
3106   llvm::GlobalVariable *Entry = createGlobalStruct(
3107       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3108       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3109 
3110   // The entry has to be created in the section the linker expects it to be.
3111   Entry->setSection("omp_offloading_entries");
3112 }
3113 
3114 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3115   // Emit the offloading entries and metadata so that the device codegen side
3116   // can easily figure out what to emit. The produced metadata looks like
3117   // this:
3118   //
3119   // !omp_offload.info = !{!1, ...}
3120   //
3121   // Right now we only generate metadata for function that contain target
3122   // regions.
3123 
3124   // If we are in simd mode or there are no entries, we don't need to do
3125   // anything.
3126   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3127     return;
3128 
3129   llvm::Module &M = CGM.getModule();
3130   llvm::LLVMContext &C = M.getContext();
3131   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3132                          SourceLocation, StringRef>,
3133               16>
3134       OrderedEntries(OffloadEntriesInfoManager.size());
3135   llvm::SmallVector<StringRef, 16> ParentFunctions(
3136       OffloadEntriesInfoManager.size());
3137 
3138   // Auxiliary methods to create metadata values and strings.
3139   auto &&GetMDInt = [this](unsigned V) {
3140     return llvm::ConstantAsMetadata::get(
3141         llvm::ConstantInt::get(CGM.Int32Ty, V));
3142   };
3143 
3144   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3145 
3146   // Create the offloading info metadata node.
3147   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3148 
3149   // Create function that emits metadata for each target region entry;
3150   auto &&TargetRegionMetadataEmitter =
3151       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3152        &GetMDString](
3153           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3154           unsigned Line,
3155           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3156         // Generate metadata for target regions. Each entry of this metadata
3157         // contains:
3158         // - Entry 0 -> Kind of this type of metadata (0).
3159         // - Entry 1 -> Device ID of the file where the entry was identified.
3160         // - Entry 2 -> File ID of the file where the entry was identified.
3161         // - Entry 3 -> Mangled name of the function where the entry was
3162         // identified.
3163         // - Entry 4 -> Line in the file where the entry was identified.
3164         // - Entry 5 -> Order the entry was created.
3165         // The first element of the metadata node is the kind.
3166         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3167                                  GetMDInt(FileID),      GetMDString(ParentName),
3168                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3169 
3170         SourceLocation Loc;
3171         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3172                   E = CGM.getContext().getSourceManager().fileinfo_end();
3173              I != E; ++I) {
3174           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3175               I->getFirst()->getUniqueID().getFile() == FileID) {
3176             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3177                 I->getFirst(), Line, 1);
3178             break;
3179           }
3180         }
3181         // Save this entry in the right position of the ordered entries array.
3182         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3183         ParentFunctions[E.getOrder()] = ParentName;
3184 
3185         // Add metadata to the named metadata node.
3186         MD->addOperand(llvm::MDNode::get(C, Ops));
3187       };
3188 
3189   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3190       TargetRegionMetadataEmitter);
3191 
3192   // Create function that emits metadata for each device global variable entry;
3193   auto &&DeviceGlobalVarMetadataEmitter =
3194       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3195        MD](StringRef MangledName,
3196            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3197                &E) {
3198         // Generate metadata for global variables. Each entry of this metadata
3199         // contains:
3200         // - Entry 0 -> Kind of this type of metadata (1).
3201         // - Entry 1 -> Mangled name of the variable.
3202         // - Entry 2 -> Declare target kind.
3203         // - Entry 3 -> Order the entry was created.
3204         // The first element of the metadata node is the kind.
3205         llvm::Metadata *Ops[] = {
3206             GetMDInt(E.getKind()), GetMDString(MangledName),
3207             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3208 
3209         // Save this entry in the right position of the ordered entries array.
3210         OrderedEntries[E.getOrder()] =
3211             std::make_tuple(&E, SourceLocation(), MangledName);
3212 
3213         // Add metadata to the named metadata node.
3214         MD->addOperand(llvm::MDNode::get(C, Ops));
3215       };
3216 
3217   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3218       DeviceGlobalVarMetadataEmitter);
3219 
3220   for (const auto &E : OrderedEntries) {
3221     assert(std::get<0>(E) && "All ordered entries must exist!");
3222     if (const auto *CE =
3223             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3224                 std::get<0>(E))) {
3225       if (!CE->getID() || !CE->getAddress()) {
3226         // Do not blame the entry if the parent funtion is not emitted.
3227         StringRef FnName = ParentFunctions[CE->getOrder()];
3228         if (!CGM.GetGlobalValue(FnName))
3229           continue;
3230         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3231             DiagnosticsEngine::Error,
3232             "Offloading entry for target region in %0 is incorrect: either the "
3233             "address or the ID is invalid.");
3234         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3235         continue;
3236       }
3237       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3238                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3239     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3240                                              OffloadEntryInfoDeviceGlobalVar>(
3241                    std::get<0>(E))) {
3242       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3243           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3244               CE->getFlags());
3245       switch (Flags) {
3246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3247         if (CGM.getLangOpts().OpenMPIsDevice &&
3248             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3249           continue;
3250         if (!CE->getAddress()) {
3251           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3252               DiagnosticsEngine::Error, "Offloading entry for declare target "
3253                                         "variable %0 is incorrect: the "
3254                                         "address is invalid.");
3255           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3256           continue;
3257         }
3258         // The vaiable has no definition - no need to add the entry.
3259         if (CE->getVarSize().isZero())
3260           continue;
3261         break;
3262       }
3263       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3264         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3265                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3266                "Declaret target link address is set.");
3267         if (CGM.getLangOpts().OpenMPIsDevice)
3268           continue;
3269         if (!CE->getAddress()) {
3270           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3271               DiagnosticsEngine::Error,
3272               "Offloading entry for declare target variable is incorrect: the "
3273               "address is invalid.");
3274           CGM.getDiags().Report(DiagID);
3275           continue;
3276         }
3277         break;
3278       }
3279       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3280                          CE->getVarSize().getQuantity(), Flags,
3281                          CE->getLinkage());
3282     } else {
3283       llvm_unreachable("Unsupported entry kind.");
3284     }
3285   }
3286 }
3287 
3288 /// Loads all the offload entries information from the host IR
3289 /// metadata.
3290 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3291   // If we are in target mode, load the metadata from the host IR. This code has
3292   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3293 
3294   if (!CGM.getLangOpts().OpenMPIsDevice)
3295     return;
3296 
3297   if (CGM.getLangOpts().OMPHostIRFile.empty())
3298     return;
3299 
3300   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3301   if (auto EC = Buf.getError()) {
3302     CGM.getDiags().Report(diag::err_cannot_open_file)
3303         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3304     return;
3305   }
3306 
3307   llvm::LLVMContext C;
3308   auto ME = expectedToErrorOrAndEmitErrors(
3309       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3310 
3311   if (auto EC = ME.getError()) {
3312     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3313         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3314     CGM.getDiags().Report(DiagID)
3315         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3316     return;
3317   }
3318 
3319   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3320   if (!MD)
3321     return;
3322 
3323   for (llvm::MDNode *MN : MD->operands()) {
3324     auto &&GetMDInt = [MN](unsigned Idx) {
3325       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3326       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3327     };
3328 
3329     auto &&GetMDString = [MN](unsigned Idx) {
3330       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3331       return V->getString();
3332     };
3333 
3334     switch (GetMDInt(0)) {
3335     default:
3336       llvm_unreachable("Unexpected metadata!");
3337       break;
3338     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3339         OffloadingEntryInfoTargetRegion:
3340       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3341           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3342           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3343           /*Order=*/GetMDInt(5));
3344       break;
3345     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3346         OffloadingEntryInfoDeviceGlobalVar:
3347       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3348           /*MangledName=*/GetMDString(1),
3349           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3350               /*Flags=*/GetMDInt(2)),
3351           /*Order=*/GetMDInt(3));
3352       break;
3353     }
3354   }
3355 }
3356 
3357 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3358   if (!KmpRoutineEntryPtrTy) {
3359     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3360     ASTContext &C = CGM.getContext();
3361     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3362     FunctionProtoType::ExtProtoInfo EPI;
3363     KmpRoutineEntryPtrQTy = C.getPointerType(
3364         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3365     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3366   }
3367 }
3368 
3369 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3370   // Make sure the type of the entry is already created. This is the type we
3371   // have to create:
3372   // struct __tgt_offload_entry{
3373   //   void      *addr;       // Pointer to the offload entry info.
3374   //                          // (function or global)
3375   //   char      *name;       // Name of the function or global.
3376   //   size_t     size;       // Size of the entry info (0 if it a function).
3377   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3378   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3379   // };
3380   if (TgtOffloadEntryQTy.isNull()) {
3381     ASTContext &C = CGM.getContext();
3382     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3383     RD->startDefinition();
3384     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3385     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3386     addFieldToRecordDecl(C, RD, C.getSizeType());
3387     addFieldToRecordDecl(
3388         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3389     addFieldToRecordDecl(
3390         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3391     RD->completeDefinition();
3392     RD->addAttr(PackedAttr::CreateImplicit(C));
3393     TgtOffloadEntryQTy = C.getRecordType(RD);
3394   }
3395   return TgtOffloadEntryQTy;
3396 }
3397 
3398 namespace {
3399 struct PrivateHelpersTy {
3400   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3401                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3402       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3403         PrivateElemInit(PrivateElemInit) {}
3404   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3405   const Expr *OriginalRef = nullptr;
3406   const VarDecl *Original = nullptr;
3407   const VarDecl *PrivateCopy = nullptr;
3408   const VarDecl *PrivateElemInit = nullptr;
3409   bool isLocalPrivate() const {
3410     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3411   }
3412 };
3413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3414 } // anonymous namespace
3415 
3416 static bool isAllocatableDecl(const VarDecl *VD) {
3417   const VarDecl *CVD = VD->getCanonicalDecl();
3418   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3419     return false;
3420   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3421   // Use the default allocation.
3422   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3423             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3424            !AA->getAllocator());
3425 }
3426 
3427 static RecordDecl *
3428 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3429   if (!Privates.empty()) {
3430     ASTContext &C = CGM.getContext();
3431     // Build struct .kmp_privates_t. {
3432     //         /*  private vars  */
3433     //       };
3434     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3435     RD->startDefinition();
3436     for (const auto &Pair : Privates) {
3437       const VarDecl *VD = Pair.second.Original;
3438       QualType Type = VD->getType().getNonReferenceType();
3439       // If the private variable is a local variable with lvalue ref type,
3440       // allocate the pointer instead of the pointee type.
3441       if (Pair.second.isLocalPrivate()) {
3442         if (VD->getType()->isLValueReferenceType())
3443           Type = C.getPointerType(Type);
3444         if (isAllocatableDecl(VD))
3445           Type = C.getPointerType(Type);
3446       }
3447       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3448       if (VD->hasAttrs()) {
3449         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3450              E(VD->getAttrs().end());
3451              I != E; ++I)
3452           FD->addAttr(*I);
3453       }
3454     }
3455     RD->completeDefinition();
3456     return RD;
3457   }
3458   return nullptr;
3459 }
3460 
3461 static RecordDecl *
3462 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3463                          QualType KmpInt32Ty,
3464                          QualType KmpRoutineEntryPointerQTy) {
3465   ASTContext &C = CGM.getContext();
3466   // Build struct kmp_task_t {
3467   //         void *              shareds;
3468   //         kmp_routine_entry_t routine;
3469   //         kmp_int32           part_id;
3470   //         kmp_cmplrdata_t data1;
3471   //         kmp_cmplrdata_t data2;
3472   // For taskloops additional fields:
3473   //         kmp_uint64          lb;
3474   //         kmp_uint64          ub;
3475   //         kmp_int64           st;
3476   //         kmp_int32           liter;
3477   //         void *              reductions;
3478   //       };
3479   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3480   UD->startDefinition();
3481   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3482   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3483   UD->completeDefinition();
3484   QualType KmpCmplrdataTy = C.getRecordType(UD);
3485   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3486   RD->startDefinition();
3487   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3488   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3489   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3490   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3491   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3492   if (isOpenMPTaskLoopDirective(Kind)) {
3493     QualType KmpUInt64Ty =
3494         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3495     QualType KmpInt64Ty =
3496         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3497     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3498     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3499     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3500     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3501     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3502   }
3503   RD->completeDefinition();
3504   return RD;
3505 }
3506 
3507 static RecordDecl *
3508 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3509                                      ArrayRef<PrivateDataTy> Privates) {
3510   ASTContext &C = CGM.getContext();
3511   // Build struct kmp_task_t_with_privates {
3512   //         kmp_task_t task_data;
3513   //         .kmp_privates_t. privates;
3514   //       };
3515   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3516   RD->startDefinition();
3517   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3518   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3519     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3520   RD->completeDefinition();
3521   return RD;
3522 }
3523 
3524 /// Emit a proxy function which accepts kmp_task_t as the second
3525 /// argument.
3526 /// \code
3527 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3528 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3529 ///   For taskloops:
3530 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3531 ///   tt->reductions, tt->shareds);
3532 ///   return 0;
3533 /// }
3534 /// \endcode
3535 static llvm::Function *
3536 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3537                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3538                       QualType KmpTaskTWithPrivatesPtrQTy,
3539                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3540                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3541                       llvm::Value *TaskPrivatesMap) {
3542   ASTContext &C = CGM.getContext();
3543   FunctionArgList Args;
3544   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3545                             ImplicitParamDecl::Other);
3546   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3547                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3548                                 ImplicitParamDecl::Other);
3549   Args.push_back(&GtidArg);
3550   Args.push_back(&TaskTypeArg);
3551   const auto &TaskEntryFnInfo =
3552       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3553   llvm::FunctionType *TaskEntryTy =
3554       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3555   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3556   auto *TaskEntry = llvm::Function::Create(
3557       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3558   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3559   TaskEntry->setDoesNotRecurse();
3560   CodeGenFunction CGF(CGM);
3561   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3562                     Loc, Loc);
3563 
3564   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3565   // tt,
3566   // For taskloops:
3567   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3568   // tt->task_data.shareds);
3569   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3570       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3571   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3572       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3573       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3574   const auto *KmpTaskTWithPrivatesQTyRD =
3575       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3576   LValue Base =
3577       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3578   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3579   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3580   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3581   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3582 
3583   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3584   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3585   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3586       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3587       CGF.ConvertTypeForMem(SharedsPtrTy));
3588 
3589   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3590   llvm::Value *PrivatesParam;
3591   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3592     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3593     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3594         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3595   } else {
3596     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3597   }
3598 
3599   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3600                                TaskPrivatesMap,
3601                                CGF.Builder
3602                                    .CreatePointerBitCastOrAddrSpaceCast(
3603                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3604                                    .getPointer()};
3605   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3606                                           std::end(CommonArgs));
3607   if (isOpenMPTaskLoopDirective(Kind)) {
3608     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3609     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3610     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3611     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3612     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3613     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3614     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3615     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3616     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3617     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3618     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3619     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3620     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3621     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3622     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3623     CallArgs.push_back(LBParam);
3624     CallArgs.push_back(UBParam);
3625     CallArgs.push_back(StParam);
3626     CallArgs.push_back(LIParam);
3627     CallArgs.push_back(RParam);
3628   }
3629   CallArgs.push_back(SharedsParam);
3630 
3631   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3632                                                   CallArgs);
3633   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3634                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3635   CGF.FinishFunction();
3636   return TaskEntry;
3637 }
3638 
3639 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3640                                             SourceLocation Loc,
3641                                             QualType KmpInt32Ty,
3642                                             QualType KmpTaskTWithPrivatesPtrQTy,
3643                                             QualType KmpTaskTWithPrivatesQTy) {
3644   ASTContext &C = CGM.getContext();
3645   FunctionArgList Args;
3646   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3647                             ImplicitParamDecl::Other);
3648   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3649                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3650                                 ImplicitParamDecl::Other);
3651   Args.push_back(&GtidArg);
3652   Args.push_back(&TaskTypeArg);
3653   const auto &DestructorFnInfo =
3654       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3655   llvm::FunctionType *DestructorFnTy =
3656       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3657   std::string Name =
3658       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3659   auto *DestructorFn =
3660       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3661                              Name, &CGM.getModule());
3662   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3663                                     DestructorFnInfo);
3664   DestructorFn->setDoesNotRecurse();
3665   CodeGenFunction CGF(CGM);
3666   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3667                     Args, Loc, Loc);
3668 
3669   LValue Base = CGF.EmitLoadOfPointerLValue(
3670       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3671       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3672   const auto *KmpTaskTWithPrivatesQTyRD =
3673       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3674   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3675   Base = CGF.EmitLValueForField(Base, *FI);
3676   for (const auto *Field :
3677        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3678     if (QualType::DestructionKind DtorKind =
3679             Field->getType().isDestructedType()) {
3680       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3681       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3682     }
3683   }
3684   CGF.FinishFunction();
3685   return DestructorFn;
3686 }
3687 
3688 /// Emit a privates mapping function for correct handling of private and
3689 /// firstprivate variables.
3690 /// \code
3691 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3692 /// **noalias priv1,...,  <tyn> **noalias privn) {
3693 ///   *priv1 = &.privates.priv1;
3694 ///   ...;
3695 ///   *privn = &.privates.privn;
3696 /// }
3697 /// \endcode
3698 static llvm::Value *
3699 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3700                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3701                                ArrayRef<PrivateDataTy> Privates) {
3702   ASTContext &C = CGM.getContext();
3703   FunctionArgList Args;
3704   ImplicitParamDecl TaskPrivatesArg(
3705       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3706       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3707       ImplicitParamDecl::Other);
3708   Args.push_back(&TaskPrivatesArg);
3709   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3710   unsigned Counter = 1;
3711   for (const Expr *E : Data.PrivateVars) {
3712     Args.push_back(ImplicitParamDecl::Create(
3713         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3714         C.getPointerType(C.getPointerType(E->getType()))
3715             .withConst()
3716             .withRestrict(),
3717         ImplicitParamDecl::Other));
3718     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3719     PrivateVarsPos[VD] = Counter;
3720     ++Counter;
3721   }
3722   for (const Expr *E : Data.FirstprivateVars) {
3723     Args.push_back(ImplicitParamDecl::Create(
3724         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3725         C.getPointerType(C.getPointerType(E->getType()))
3726             .withConst()
3727             .withRestrict(),
3728         ImplicitParamDecl::Other));
3729     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3730     PrivateVarsPos[VD] = Counter;
3731     ++Counter;
3732   }
3733   for (const Expr *E : Data.LastprivateVars) {
3734     Args.push_back(ImplicitParamDecl::Create(
3735         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3736         C.getPointerType(C.getPointerType(E->getType()))
3737             .withConst()
3738             .withRestrict(),
3739         ImplicitParamDecl::Other));
3740     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3741     PrivateVarsPos[VD] = Counter;
3742     ++Counter;
3743   }
3744   for (const VarDecl *VD : Data.PrivateLocals) {
3745     QualType Ty = VD->getType().getNonReferenceType();
3746     if (VD->getType()->isLValueReferenceType())
3747       Ty = C.getPointerType(Ty);
3748     if (isAllocatableDecl(VD))
3749       Ty = C.getPointerType(Ty);
3750     Args.push_back(ImplicitParamDecl::Create(
3751         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3753         ImplicitParamDecl::Other));
3754     PrivateVarsPos[VD] = Counter;
3755     ++Counter;
3756   }
3757   const auto &TaskPrivatesMapFnInfo =
3758       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3759   llvm::FunctionType *TaskPrivatesMapTy =
3760       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3761   std::string Name =
3762       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3763   auto *TaskPrivatesMap = llvm::Function::Create(
3764       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3765       &CGM.getModule());
3766   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3767                                     TaskPrivatesMapFnInfo);
3768   if (CGM.getLangOpts().Optimize) {
3769     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3770     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3771     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3772   }
3773   CodeGenFunction CGF(CGM);
3774   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3775                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3776 
3777   // *privi = &.privates.privi;
3778   LValue Base = CGF.EmitLoadOfPointerLValue(
3779       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3780       TaskPrivatesArg.getType()->castAs<PointerType>());
3781   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3782   Counter = 0;
3783   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3784     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3785     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3786     LValue RefLVal =
3787         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3788     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3789         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3790     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3791     ++Counter;
3792   }
3793   CGF.FinishFunction();
3794   return TaskPrivatesMap;
3795 }
3796 
3797 /// Emit initialization for private variables in task-based directives.
3798 static void emitPrivatesInit(CodeGenFunction &CGF,
3799                              const OMPExecutableDirective &D,
3800                              Address KmpTaskSharedsPtr, LValue TDBase,
3801                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3802                              QualType SharedsTy, QualType SharedsPtrTy,
3803                              const OMPTaskDataTy &Data,
3804                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3805   ASTContext &C = CGF.getContext();
3806   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3807   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3808   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3809                                  ? OMPD_taskloop
3810                                  : OMPD_task;
3811   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3812   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3813   LValue SrcBase;
3814   bool IsTargetTask =
3815       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3816       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3817   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3818   // PointersArray, SizesArray, and MappersArray. The original variables for
3819   // these arrays are not captured and we get their addresses explicitly.
3820   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3821       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3822     SrcBase = CGF.MakeAddrLValue(
3823         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3824             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3825         SharedsTy);
3826   }
3827   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3828   for (const PrivateDataTy &Pair : Privates) {
3829     // Do not initialize private locals.
3830     if (Pair.second.isLocalPrivate()) {
3831       ++FI;
3832       continue;
3833     }
3834     const VarDecl *VD = Pair.second.PrivateCopy;
3835     const Expr *Init = VD->getAnyInitializer();
3836     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3837                              !CGF.isTrivialInitializer(Init)))) {
3838       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3839       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3840         const VarDecl *OriginalVD = Pair.second.Original;
3841         // Check if the variable is the target-based BasePointersArray,
3842         // PointersArray, SizesArray, or MappersArray.
3843         LValue SharedRefLValue;
3844         QualType Type = PrivateLValue.getType();
3845         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3846         if (IsTargetTask && !SharedField) {
3847           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3848                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3849                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3850                          ->getNumParams() == 0 &&
3851                  isa<TranslationUnitDecl>(
3852                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3853                          ->getDeclContext()) &&
3854                  "Expected artificial target data variable.");
3855           SharedRefLValue =
3856               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3857         } else if (ForDup) {
3858           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3859           SharedRefLValue = CGF.MakeAddrLValue(
3860               Address(SharedRefLValue.getPointer(CGF),
3861                       C.getDeclAlign(OriginalVD)),
3862               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3863               SharedRefLValue.getTBAAInfo());
3864         } else if (CGF.LambdaCaptureFields.count(
3865                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3866                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3867           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3868         } else {
3869           // Processing for implicitly captured variables.
3870           InlinedOpenMPRegionRAII Region(
3871               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3872               /*HasCancel=*/false, /*NoInheritance=*/true);
3873           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3874         }
3875         if (Type->isArrayType()) {
3876           // Initialize firstprivate array.
3877           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3878             // Perform simple memcpy.
3879             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3880           } else {
3881             // Initialize firstprivate array using element-by-element
3882             // initialization.
3883             CGF.EmitOMPAggregateAssign(
3884                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3885                 Type,
3886                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3887                                                   Address SrcElement) {
3888                   // Clean up any temporaries needed by the initialization.
3889                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3890                   InitScope.addPrivate(
3891                       Elem, [SrcElement]() -> Address { return SrcElement; });
3892                   (void)InitScope.Privatize();
3893                   // Emit initialization for single element.
3894                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3895                       CGF, &CapturesInfo);
3896                   CGF.EmitAnyExprToMem(Init, DestElement,
3897                                        Init->getType().getQualifiers(),
3898                                        /*IsInitializer=*/false);
3899                 });
3900           }
3901         } else {
3902           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3903           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3904             return SharedRefLValue.getAddress(CGF);
3905           });
3906           (void)InitScope.Privatize();
3907           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3908           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3909                              /*capturedByInit=*/false);
3910         }
3911       } else {
3912         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3913       }
3914     }
3915     ++FI;
3916   }
3917 }
3918 
3919 /// Check if duplication function is required for taskloops.
3920 static bool checkInitIsRequired(CodeGenFunction &CGF,
3921                                 ArrayRef<PrivateDataTy> Privates) {
3922   bool InitRequired = false;
3923   for (const PrivateDataTy &Pair : Privates) {
3924     if (Pair.second.isLocalPrivate())
3925       continue;
3926     const VarDecl *VD = Pair.second.PrivateCopy;
3927     const Expr *Init = VD->getAnyInitializer();
3928     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3929                                     !CGF.isTrivialInitializer(Init));
3930     if (InitRequired)
3931       break;
3932   }
3933   return InitRequired;
3934 }
3935 
3936 
3937 /// Emit task_dup function (for initialization of
3938 /// private/firstprivate/lastprivate vars and last_iter flag)
3939 /// \code
3940 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3941 /// lastpriv) {
3942 /// // setup lastprivate flag
3943 ///    task_dst->last = lastpriv;
3944 /// // could be constructor calls here...
3945 /// }
3946 /// \endcode
3947 static llvm::Value *
3948 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3949                     const OMPExecutableDirective &D,
3950                     QualType KmpTaskTWithPrivatesPtrQTy,
3951                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3952                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3953                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3954                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3955   ASTContext &C = CGM.getContext();
3956   FunctionArgList Args;
3957   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3958                            KmpTaskTWithPrivatesPtrQTy,
3959                            ImplicitParamDecl::Other);
3960   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3961                            KmpTaskTWithPrivatesPtrQTy,
3962                            ImplicitParamDecl::Other);
3963   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3964                                 ImplicitParamDecl::Other);
3965   Args.push_back(&DstArg);
3966   Args.push_back(&SrcArg);
3967   Args.push_back(&LastprivArg);
3968   const auto &TaskDupFnInfo =
3969       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3970   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3971   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3972   auto *TaskDup = llvm::Function::Create(
3973       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3974   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3975   TaskDup->setDoesNotRecurse();
3976   CodeGenFunction CGF(CGM);
3977   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3978                     Loc);
3979 
3980   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3981       CGF.GetAddrOfLocalVar(&DstArg),
3982       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3983   // task_dst->liter = lastpriv;
3984   if (WithLastIter) {
3985     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3986     LValue Base = CGF.EmitLValueForField(
3987         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3988     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3989     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3990         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3991     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3992   }
3993 
3994   // Emit initial values for private copies (if any).
3995   assert(!Privates.empty());
3996   Address KmpTaskSharedsPtr = Address::invalid();
3997   if (!Data.FirstprivateVars.empty()) {
3998     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3999         CGF.GetAddrOfLocalVar(&SrcArg),
4000         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4001     LValue Base = CGF.EmitLValueForField(
4002         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4003     KmpTaskSharedsPtr = Address(
4004         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4005                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4006                                                   KmpTaskTShareds)),
4007                              Loc),
4008         CGM.getNaturalTypeAlignment(SharedsTy));
4009   }
4010   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4011                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4012   CGF.FinishFunction();
4013   return TaskDup;
4014 }
4015 
4016 /// Checks if destructor function is required to be generated.
4017 /// \return true if cleanups are required, false otherwise.
4018 static bool
4019 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4020                          ArrayRef<PrivateDataTy> Privates) {
4021   for (const PrivateDataTy &P : Privates) {
4022     if (P.second.isLocalPrivate())
4023       continue;
4024     QualType Ty = P.second.Original->getType().getNonReferenceType();
4025     if (Ty.isDestructedType())
4026       return true;
4027   }
4028   return false;
4029 }
4030 
4031 namespace {
4032 /// Loop generator for OpenMP iterator expression.
4033 class OMPIteratorGeneratorScope final
4034     : public CodeGenFunction::OMPPrivateScope {
4035   CodeGenFunction &CGF;
4036   const OMPIteratorExpr *E = nullptr;
4037   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4038   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4039   OMPIteratorGeneratorScope() = delete;
4040   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4041 
4042 public:
4043   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4044       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4045     if (!E)
4046       return;
4047     SmallVector<llvm::Value *, 4> Uppers;
4048     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4049       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4050       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4051       addPrivate(VD, [&CGF, VD]() {
4052         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4053       });
4054       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4055       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4056         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4057                                  "counter.addr");
4058       });
4059     }
4060     Privatize();
4061 
4062     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4063       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4064       LValue CLVal =
4065           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4066                              HelperData.CounterVD->getType());
4067       // Counter = 0;
4068       CGF.EmitStoreOfScalar(
4069           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4070           CLVal);
4071       CodeGenFunction::JumpDest &ContDest =
4072           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4073       CodeGenFunction::JumpDest &ExitDest =
4074           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4075       // N = <number-of_iterations>;
4076       llvm::Value *N = Uppers[I];
4077       // cont:
4078       // if (Counter < N) goto body; else goto exit;
4079       CGF.EmitBlock(ContDest.getBlock());
4080       auto *CVal =
4081           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4082       llvm::Value *Cmp =
4083           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4084               ? CGF.Builder.CreateICmpSLT(CVal, N)
4085               : CGF.Builder.CreateICmpULT(CVal, N);
4086       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4087       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4088       // body:
4089       CGF.EmitBlock(BodyBB);
4090       // Iteri = Begini + Counter * Stepi;
4091       CGF.EmitIgnoredExpr(HelperData.Update);
4092     }
4093   }
4094   ~OMPIteratorGeneratorScope() {
4095     if (!E)
4096       return;
4097     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4098       // Counter = Counter + 1;
4099       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4100       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4101       // goto cont;
4102       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4103       // exit:
4104       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4105     }
4106   }
4107 };
4108 } // namespace
4109 
4110 static std::pair<llvm::Value *, llvm::Value *>
4111 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4112   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4113   llvm::Value *Addr;
4114   if (OASE) {
4115     const Expr *Base = OASE->getBase();
4116     Addr = CGF.EmitScalarExpr(Base);
4117   } else {
4118     Addr = CGF.EmitLValue(E).getPointer(CGF);
4119   }
4120   llvm::Value *SizeVal;
4121   QualType Ty = E->getType();
4122   if (OASE) {
4123     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4124     for (const Expr *SE : OASE->getDimensions()) {
4125       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4126       Sz = CGF.EmitScalarConversion(
4127           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4128       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4129     }
4130   } else if (const auto *ASE =
4131                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4132     LValue UpAddrLVal =
4133         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4134     llvm::Value *UpAddr =
4135         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4136     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4137     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4138     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4139   } else {
4140     SizeVal = CGF.getTypeSize(Ty);
4141   }
4142   return std::make_pair(Addr, SizeVal);
4143 }
4144 
4145 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4146 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4147   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4148   if (KmpTaskAffinityInfoTy.isNull()) {
4149     RecordDecl *KmpAffinityInfoRD =
4150         C.buildImplicitRecord("kmp_task_affinity_info_t");
4151     KmpAffinityInfoRD->startDefinition();
4152     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4153     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4154     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4155     KmpAffinityInfoRD->completeDefinition();
4156     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4157   }
4158 }
4159 
4160 CGOpenMPRuntime::TaskResultTy
4161 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4162                               const OMPExecutableDirective &D,
4163                               llvm::Function *TaskFunction, QualType SharedsTy,
4164                               Address Shareds, const OMPTaskDataTy &Data) {
4165   ASTContext &C = CGM.getContext();
4166   llvm::SmallVector<PrivateDataTy, 4> Privates;
4167   // Aggregate privates and sort them by the alignment.
4168   const auto *I = Data.PrivateCopies.begin();
4169   for (const Expr *E : Data.PrivateVars) {
4170     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4171     Privates.emplace_back(
4172         C.getDeclAlign(VD),
4173         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4174                          /*PrivateElemInit=*/nullptr));
4175     ++I;
4176   }
4177   I = Data.FirstprivateCopies.begin();
4178   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4179   for (const Expr *E : Data.FirstprivateVars) {
4180     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4181     Privates.emplace_back(
4182         C.getDeclAlign(VD),
4183         PrivateHelpersTy(
4184             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4185             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4186     ++I;
4187     ++IElemInitRef;
4188   }
4189   I = Data.LastprivateCopies.begin();
4190   for (const Expr *E : Data.LastprivateVars) {
4191     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4192     Privates.emplace_back(
4193         C.getDeclAlign(VD),
4194         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4195                          /*PrivateElemInit=*/nullptr));
4196     ++I;
4197   }
4198   for (const VarDecl *VD : Data.PrivateLocals) {
4199     if (isAllocatableDecl(VD))
4200       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4201     else
4202       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4203   }
4204   llvm::stable_sort(Privates,
4205                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4206                       return L.first > R.first;
4207                     });
4208   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4209   // Build type kmp_routine_entry_t (if not built yet).
4210   emitKmpRoutineEntryT(KmpInt32Ty);
4211   // Build type kmp_task_t (if not built yet).
4212   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4213     if (SavedKmpTaskloopTQTy.isNull()) {
4214       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4215           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4216     }
4217     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4218   } else {
4219     assert((D.getDirectiveKind() == OMPD_task ||
4220             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4221             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4222            "Expected taskloop, task or target directive");
4223     if (SavedKmpTaskTQTy.isNull()) {
4224       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4225           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4226     }
4227     KmpTaskTQTy = SavedKmpTaskTQTy;
4228   }
4229   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4230   // Build particular struct kmp_task_t for the given task.
4231   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4232       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4233   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4234   QualType KmpTaskTWithPrivatesPtrQTy =
4235       C.getPointerType(KmpTaskTWithPrivatesQTy);
4236   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4237   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4238       KmpTaskTWithPrivatesTy->getPointerTo();
4239   llvm::Value *KmpTaskTWithPrivatesTySize =
4240       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4241   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4242 
4243   // Emit initial values for private copies (if any).
4244   llvm::Value *TaskPrivatesMap = nullptr;
4245   llvm::Type *TaskPrivatesMapTy =
4246       std::next(TaskFunction->arg_begin(), 3)->getType();
4247   if (!Privates.empty()) {
4248     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4249     TaskPrivatesMap =
4250         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4251     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4252         TaskPrivatesMap, TaskPrivatesMapTy);
4253   } else {
4254     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4255         cast<llvm::PointerType>(TaskPrivatesMapTy));
4256   }
4257   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4258   // kmp_task_t *tt);
4259   llvm::Function *TaskEntry = emitProxyTaskFunction(
4260       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4261       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4262       TaskPrivatesMap);
4263 
4264   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4265   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4266   // kmp_routine_entry_t *task_entry);
4267   // Task flags. Format is taken from
4268   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4269   // description of kmp_tasking_flags struct.
4270   enum {
4271     TiedFlag = 0x1,
4272     FinalFlag = 0x2,
4273     DestructorsFlag = 0x8,
4274     PriorityFlag = 0x20,
4275     DetachableFlag = 0x40,
4276   };
4277   unsigned Flags = Data.Tied ? TiedFlag : 0;
4278   bool NeedsCleanup = false;
4279   if (!Privates.empty()) {
4280     NeedsCleanup =
4281         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4282     if (NeedsCleanup)
4283       Flags = Flags | DestructorsFlag;
4284   }
4285   if (Data.Priority.getInt())
4286     Flags = Flags | PriorityFlag;
4287   if (D.hasClausesOfKind<OMPDetachClause>())
4288     Flags = Flags | DetachableFlag;
4289   llvm::Value *TaskFlags =
4290       Data.Final.getPointer()
4291           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4292                                      CGF.Builder.getInt32(FinalFlag),
4293                                      CGF.Builder.getInt32(/*C=*/0))
4294           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4295   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4296   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4297   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4298       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4299       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4300           TaskEntry, KmpRoutineEntryPtrTy)};
4301   llvm::Value *NewTask;
4302   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4303     // Check if we have any device clause associated with the directive.
4304     const Expr *Device = nullptr;
4305     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4306       Device = C->getDevice();
4307     // Emit device ID if any otherwise use default value.
4308     llvm::Value *DeviceID;
4309     if (Device)
4310       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4311                                            CGF.Int64Ty, /*isSigned=*/true);
4312     else
4313       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4314     AllocArgs.push_back(DeviceID);
4315     NewTask = CGF.EmitRuntimeCall(
4316         OMPBuilder.getOrCreateRuntimeFunction(
4317             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4318         AllocArgs);
4319   } else {
4320     NewTask =
4321         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4322                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4323                             AllocArgs);
4324   }
4325   // Emit detach clause initialization.
4326   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4327   // task_descriptor);
4328   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4329     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4330     LValue EvtLVal = CGF.EmitLValue(Evt);
4331 
4332     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4333     // int gtid, kmp_task_t *task);
4334     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4335     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4336     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4337     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4338         OMPBuilder.getOrCreateRuntimeFunction(
4339             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4340         {Loc, Tid, NewTask});
4341     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4342                                       Evt->getExprLoc());
4343     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4344   }
4345   // Process affinity clauses.
4346   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4347     // Process list of affinity data.
4348     ASTContext &C = CGM.getContext();
4349     Address AffinitiesArray = Address::invalid();
4350     // Calculate number of elements to form the array of affinity data.
4351     llvm::Value *NumOfElements = nullptr;
4352     unsigned NumAffinities = 0;
4353     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4354       if (const Expr *Modifier = C->getModifier()) {
4355         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4356         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4357           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4358           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4359           NumOfElements =
4360               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4361         }
4362       } else {
4363         NumAffinities += C->varlist_size();
4364       }
4365     }
4366     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4367     // Fields ids in kmp_task_affinity_info record.
4368     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4369 
4370     QualType KmpTaskAffinityInfoArrayTy;
4371     if (NumOfElements) {
4372       NumOfElements = CGF.Builder.CreateNUWAdd(
4373           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4374       OpaqueValueExpr OVE(
4375           Loc,
4376           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4377           VK_RValue);
4378       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4379                                                     RValue::get(NumOfElements));
4380       KmpTaskAffinityInfoArrayTy =
4381           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4382                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4383       // Properly emit variable-sized array.
4384       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4385                                            ImplicitParamDecl::Other);
4386       CGF.EmitVarDecl(*PD);
4387       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4388       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4389                                                 /*isSigned=*/false);
4390     } else {
4391       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4392           KmpTaskAffinityInfoTy,
4393           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4394           ArrayType::Normal, /*IndexTypeQuals=*/0);
4395       AffinitiesArray =
4396           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4397       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4398       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4399                                              /*isSigned=*/false);
4400     }
4401 
4402     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4403     // Fill array by elements without iterators.
4404     unsigned Pos = 0;
4405     bool HasIterator = false;
4406     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4407       if (C->getModifier()) {
4408         HasIterator = true;
4409         continue;
4410       }
4411       for (const Expr *E : C->varlists()) {
4412         llvm::Value *Addr;
4413         llvm::Value *Size;
4414         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4415         LValue Base =
4416             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4417                                KmpTaskAffinityInfoTy);
4418         // affs[i].base_addr = &<Affinities[i].second>;
4419         LValue BaseAddrLVal = CGF.EmitLValueForField(
4420             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4421         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4422                               BaseAddrLVal);
4423         // affs[i].len = sizeof(<Affinities[i].second>);
4424         LValue LenLVal = CGF.EmitLValueForField(
4425             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4426         CGF.EmitStoreOfScalar(Size, LenLVal);
4427         ++Pos;
4428       }
4429     }
4430     LValue PosLVal;
4431     if (HasIterator) {
4432       PosLVal = CGF.MakeAddrLValue(
4433           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4434           C.getSizeType());
4435       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4436     }
4437     // Process elements with iterators.
4438     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4439       const Expr *Modifier = C->getModifier();
4440       if (!Modifier)
4441         continue;
4442       OMPIteratorGeneratorScope IteratorScope(
4443           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4444       for (const Expr *E : C->varlists()) {
4445         llvm::Value *Addr;
4446         llvm::Value *Size;
4447         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4448         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4449         LValue Base = CGF.MakeAddrLValue(
4450             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4451                     AffinitiesArray.getAlignment()),
4452             KmpTaskAffinityInfoTy);
4453         // affs[i].base_addr = &<Affinities[i].second>;
4454         LValue BaseAddrLVal = CGF.EmitLValueForField(
4455             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4456         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4457                               BaseAddrLVal);
4458         // affs[i].len = sizeof(<Affinities[i].second>);
4459         LValue LenLVal = CGF.EmitLValueForField(
4460             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4461         CGF.EmitStoreOfScalar(Size, LenLVal);
4462         Idx = CGF.Builder.CreateNUWAdd(
4463             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4464         CGF.EmitStoreOfScalar(Idx, PosLVal);
4465       }
4466     }
4467     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4468     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4469     // naffins, kmp_task_affinity_info_t *affin_list);
4470     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4471     llvm::Value *GTid = getThreadID(CGF, Loc);
4472     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4473         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4474     // FIXME: Emit the function and ignore its result for now unless the
4475     // runtime function is properly implemented.
4476     (void)CGF.EmitRuntimeCall(
4477         OMPBuilder.getOrCreateRuntimeFunction(
4478             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4479         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4480   }
4481   llvm::Value *NewTaskNewTaskTTy =
4482       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4483           NewTask, KmpTaskTWithPrivatesPtrTy);
4484   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4485                                                KmpTaskTWithPrivatesQTy);
4486   LValue TDBase =
4487       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4488   // Fill the data in the resulting kmp_task_t record.
4489   // Copy shareds if there are any.
4490   Address KmpTaskSharedsPtr = Address::invalid();
4491   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4492     KmpTaskSharedsPtr =
4493         Address(CGF.EmitLoadOfScalar(
4494                     CGF.EmitLValueForField(
4495                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4496                                            KmpTaskTShareds)),
4497                     Loc),
4498                 CGM.getNaturalTypeAlignment(SharedsTy));
4499     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4500     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4501     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4502   }
4503   // Emit initial values for private copies (if any).
4504   TaskResultTy Result;
4505   if (!Privates.empty()) {
4506     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4507                      SharedsTy, SharedsPtrTy, Data, Privates,
4508                      /*ForDup=*/false);
4509     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4510         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4511       Result.TaskDupFn = emitTaskDupFunction(
4512           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4513           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4514           /*WithLastIter=*/!Data.LastprivateVars.empty());
4515     }
4516   }
4517   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4518   enum { Priority = 0, Destructors = 1 };
4519   // Provide pointer to function with destructors for privates.
4520   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4521   const RecordDecl *KmpCmplrdataUD =
4522       (*FI)->getType()->getAsUnionType()->getDecl();
4523   if (NeedsCleanup) {
4524     llvm::Value *DestructorFn = emitDestructorsFunction(
4525         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4526         KmpTaskTWithPrivatesQTy);
4527     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4528     LValue DestructorsLV = CGF.EmitLValueForField(
4529         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4530     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531                               DestructorFn, KmpRoutineEntryPtrTy),
4532                           DestructorsLV);
4533   }
4534   // Set priority.
4535   if (Data.Priority.getInt()) {
4536     LValue Data2LV = CGF.EmitLValueForField(
4537         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4538     LValue PriorityLV = CGF.EmitLValueForField(
4539         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4540     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4541   }
4542   Result.NewTask = NewTask;
4543   Result.TaskEntry = TaskEntry;
4544   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4545   Result.TDBase = TDBase;
4546   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4547   return Result;
4548 }
4549 
4550 namespace {
4551 /// Dependence kind for RTL.
4552 enum RTLDependenceKindTy {
4553   DepIn = 0x01,
4554   DepInOut = 0x3,
4555   DepMutexInOutSet = 0x4
4556 };
4557 /// Fields ids in kmp_depend_info record.
4558 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4559 } // namespace
4560 
4561 /// Translates internal dependency kind into the runtime kind.
4562 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4563   RTLDependenceKindTy DepKind;
4564   switch (K) {
4565   case OMPC_DEPEND_in:
4566     DepKind = DepIn;
4567     break;
4568   // Out and InOut dependencies must use the same code.
4569   case OMPC_DEPEND_out:
4570   case OMPC_DEPEND_inout:
4571     DepKind = DepInOut;
4572     break;
4573   case OMPC_DEPEND_mutexinoutset:
4574     DepKind = DepMutexInOutSet;
4575     break;
4576   case OMPC_DEPEND_source:
4577   case OMPC_DEPEND_sink:
4578   case OMPC_DEPEND_depobj:
4579   case OMPC_DEPEND_unknown:
4580     llvm_unreachable("Unknown task dependence type");
4581   }
4582   return DepKind;
4583 }
4584 
4585 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4586 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4587                            QualType &FlagsTy) {
4588   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4589   if (KmpDependInfoTy.isNull()) {
4590     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4591     KmpDependInfoRD->startDefinition();
4592     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4593     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4594     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4595     KmpDependInfoRD->completeDefinition();
4596     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4597   }
4598 }
4599 
4600 std::pair<llvm::Value *, LValue>
4601 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4602                                    SourceLocation Loc) {
4603   ASTContext &C = CGM.getContext();
4604   QualType FlagsTy;
4605   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4606   RecordDecl *KmpDependInfoRD =
4607       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4608   LValue Base = CGF.EmitLoadOfPointerLValue(
4609       DepobjLVal.getAddress(CGF),
4610       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4611   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4612   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4613           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4614   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4615                             Base.getTBAAInfo());
4616   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4617       Addr.getPointer(),
4618       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4619   LValue NumDepsBase = CGF.MakeAddrLValue(
4620       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4621       Base.getBaseInfo(), Base.getTBAAInfo());
4622   // NumDeps = deps[i].base_addr;
4623   LValue BaseAddrLVal = CGF.EmitLValueForField(
4624       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4625   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4626   return std::make_pair(NumDeps, Base);
4627 }
4628 
4629 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4630                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4631                            const OMPTaskDataTy::DependData &Data,
4632                            Address DependenciesArray) {
4633   CodeGenModule &CGM = CGF.CGM;
4634   ASTContext &C = CGM.getContext();
4635   QualType FlagsTy;
4636   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637   RecordDecl *KmpDependInfoRD =
4638       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4640 
4641   OMPIteratorGeneratorScope IteratorScope(
4642       CGF, cast_or_null<OMPIteratorExpr>(
4643                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4644                                  : nullptr));
4645   for (const Expr *E : Data.DepExprs) {
4646     llvm::Value *Addr;
4647     llvm::Value *Size;
4648     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4649     LValue Base;
4650     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4651       Base = CGF.MakeAddrLValue(
4652           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4653     } else {
4654       LValue &PosLVal = *Pos.get<LValue *>();
4655       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4656       Base = CGF.MakeAddrLValue(
4657           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4658                   DependenciesArray.getAlignment()),
4659           KmpDependInfoTy);
4660     }
4661     // deps[i].base_addr = &<Dependencies[i].second>;
4662     LValue BaseAddrLVal = CGF.EmitLValueForField(
4663         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4664     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4665                           BaseAddrLVal);
4666     // deps[i].len = sizeof(<Dependencies[i].second>);
4667     LValue LenLVal = CGF.EmitLValueForField(
4668         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4669     CGF.EmitStoreOfScalar(Size, LenLVal);
4670     // deps[i].flags = <Dependencies[i].first>;
4671     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4672     LValue FlagsLVal = CGF.EmitLValueForField(
4673         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4674     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4675                           FlagsLVal);
4676     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4677       ++(*P);
4678     } else {
4679       LValue &PosLVal = *Pos.get<LValue *>();
4680       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4681       Idx = CGF.Builder.CreateNUWAdd(Idx,
4682                                      llvm::ConstantInt::get(Idx->getType(), 1));
4683       CGF.EmitStoreOfScalar(Idx, PosLVal);
4684     }
4685   }
4686 }
4687 
4688 static SmallVector<llvm::Value *, 4>
4689 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4690                         const OMPTaskDataTy::DependData &Data) {
4691   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4692          "Expected depobj dependecy kind.");
4693   SmallVector<llvm::Value *, 4> Sizes;
4694   SmallVector<LValue, 4> SizeLVals;
4695   ASTContext &C = CGF.getContext();
4696   QualType FlagsTy;
4697   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4698   RecordDecl *KmpDependInfoRD =
4699       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4700   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4701   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4702   {
4703     OMPIteratorGeneratorScope IteratorScope(
4704         CGF, cast_or_null<OMPIteratorExpr>(
4705                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4706                                    : nullptr));
4707     for (const Expr *E : Data.DepExprs) {
4708       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4709       LValue Base = CGF.EmitLoadOfPointerLValue(
4710           DepobjLVal.getAddress(CGF),
4711           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4712       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4713           Base.getAddress(CGF), KmpDependInfoPtrT);
4714       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4715                                 Base.getTBAAInfo());
4716       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4717           Addr.getPointer(),
4718           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4719       LValue NumDepsBase = CGF.MakeAddrLValue(
4720           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4721           Base.getBaseInfo(), Base.getTBAAInfo());
4722       // NumDeps = deps[i].base_addr;
4723       LValue BaseAddrLVal = CGF.EmitLValueForField(
4724           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4725       llvm::Value *NumDeps =
4726           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4727       LValue NumLVal = CGF.MakeAddrLValue(
4728           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4729           C.getUIntPtrType());
4730       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4731                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4732       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4733       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4734       CGF.EmitStoreOfScalar(Add, NumLVal);
4735       SizeLVals.push_back(NumLVal);
4736     }
4737   }
4738   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4739     llvm::Value *Size =
4740         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4741     Sizes.push_back(Size);
4742   }
4743   return Sizes;
4744 }
4745 
4746 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4747                                LValue PosLVal,
4748                                const OMPTaskDataTy::DependData &Data,
4749                                Address DependenciesArray) {
4750   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4751          "Expected depobj dependecy kind.");
4752   ASTContext &C = CGF.getContext();
4753   QualType FlagsTy;
4754   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4755   RecordDecl *KmpDependInfoRD =
4756       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4757   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4758   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4759   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4760   {
4761     OMPIteratorGeneratorScope IteratorScope(
4762         CGF, cast_or_null<OMPIteratorExpr>(
4763                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4764                                    : nullptr));
4765     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4766       const Expr *E = Data.DepExprs[I];
4767       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4768       LValue Base = CGF.EmitLoadOfPointerLValue(
4769           DepobjLVal.getAddress(CGF),
4770           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4771       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4772           Base.getAddress(CGF), KmpDependInfoPtrT);
4773       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4774                                 Base.getTBAAInfo());
4775 
4776       // Get number of elements in a single depobj.
4777       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4778           Addr.getPointer(),
4779           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4780       LValue NumDepsBase = CGF.MakeAddrLValue(
4781           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4782           Base.getBaseInfo(), Base.getTBAAInfo());
4783       // NumDeps = deps[i].base_addr;
4784       LValue BaseAddrLVal = CGF.EmitLValueForField(
4785           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4786       llvm::Value *NumDeps =
4787           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4788 
4789       // memcopy dependency data.
4790       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4791           ElSize,
4792           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4793       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4794       Address DepAddr =
4795           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4796                   DependenciesArray.getAlignment());
4797       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4798 
4799       // Increase pos.
4800       // pos += size;
4801       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4802       CGF.EmitStoreOfScalar(Add, PosLVal);
4803     }
4804   }
4805 }
4806 
4807 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4808     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4809     SourceLocation Loc) {
4810   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4811         return D.DepExprs.empty();
4812       }))
4813     return std::make_pair(nullptr, Address::invalid());
4814   // Process list of dependencies.
4815   ASTContext &C = CGM.getContext();
4816   Address DependenciesArray = Address::invalid();
4817   llvm::Value *NumOfElements = nullptr;
4818   unsigned NumDependencies = std::accumulate(
4819       Dependencies.begin(), Dependencies.end(), 0,
4820       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4821         return D.DepKind == OMPC_DEPEND_depobj
4822                    ? V
4823                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4824       });
4825   QualType FlagsTy;
4826   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4827   bool HasDepobjDeps = false;
4828   bool HasRegularWithIterators = false;
4829   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4830   llvm::Value *NumOfRegularWithIterators =
4831       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4832   // Calculate number of depobj dependecies and regular deps with the iterators.
4833   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4834     if (D.DepKind == OMPC_DEPEND_depobj) {
4835       SmallVector<llvm::Value *, 4> Sizes =
4836           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4837       for (llvm::Value *Size : Sizes) {
4838         NumOfDepobjElements =
4839             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4840       }
4841       HasDepobjDeps = true;
4842       continue;
4843     }
4844     // Include number of iterations, if any.
4845     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4846       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4847         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4848         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4849         NumOfRegularWithIterators =
4850             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4851       }
4852       HasRegularWithIterators = true;
4853       continue;
4854     }
4855   }
4856 
4857   QualType KmpDependInfoArrayTy;
4858   if (HasDepobjDeps || HasRegularWithIterators) {
4859     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4860                                            /*isSigned=*/false);
4861     if (HasDepobjDeps) {
4862       NumOfElements =
4863           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4864     }
4865     if (HasRegularWithIterators) {
4866       NumOfElements =
4867           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4868     }
4869     OpaqueValueExpr OVE(Loc,
4870                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4871                         VK_RValue);
4872     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4873                                                   RValue::get(NumOfElements));
4874     KmpDependInfoArrayTy =
4875         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4876                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4877     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4878     // Properly emit variable-sized array.
4879     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4880                                          ImplicitParamDecl::Other);
4881     CGF.EmitVarDecl(*PD);
4882     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4883     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4884                                               /*isSigned=*/false);
4885   } else {
4886     KmpDependInfoArrayTy = C.getConstantArrayType(
4887         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4888         ArrayType::Normal, /*IndexTypeQuals=*/0);
4889     DependenciesArray =
4890         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4891     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4892     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4893                                            /*isSigned=*/false);
4894   }
4895   unsigned Pos = 0;
4896   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4897     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4898         Dependencies[I].IteratorExpr)
4899       continue;
4900     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4901                    DependenciesArray);
4902   }
4903   // Copy regular dependecies with iterators.
4904   LValue PosLVal = CGF.MakeAddrLValue(
4905       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4906   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4907   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4909         !Dependencies[I].IteratorExpr)
4910       continue;
4911     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4912                    DependenciesArray);
4913   }
4914   // Copy final depobj arrays without iterators.
4915   if (HasDepobjDeps) {
4916     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4917       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4918         continue;
4919       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4920                          DependenciesArray);
4921     }
4922   }
4923   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4924       DependenciesArray, CGF.VoidPtrTy);
4925   return std::make_pair(NumOfElements, DependenciesArray);
4926 }
4927 
4928 Address CGOpenMPRuntime::emitDepobjDependClause(
4929     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4930     SourceLocation Loc) {
4931   if (Dependencies.DepExprs.empty())
4932     return Address::invalid();
4933   // Process list of dependencies.
4934   ASTContext &C = CGM.getContext();
4935   Address DependenciesArray = Address::invalid();
4936   unsigned NumDependencies = Dependencies.DepExprs.size();
4937   QualType FlagsTy;
4938   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4939   RecordDecl *KmpDependInfoRD =
4940       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4941 
4942   llvm::Value *Size;
4943   // Define type kmp_depend_info[<Dependencies.size()>];
4944   // For depobj reserve one extra element to store the number of elements.
4945   // It is required to handle depobj(x) update(in) construct.
4946   // kmp_depend_info[<Dependencies.size()>] deps;
4947   llvm::Value *NumDepsVal;
4948   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4949   if (const auto *IE =
4950           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4951     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4952     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4953       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4954       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4955       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4956     }
4957     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4958                                     NumDepsVal);
4959     CharUnits SizeInBytes =
4960         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4961     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4962     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4963     NumDepsVal =
4964         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4965   } else {
4966     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4967         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4968         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4969     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4970     Size = CGM.getSize(Sz.alignTo(Align));
4971     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4972   }
4973   // Need to allocate on the dynamic memory.
4974   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4975   // Use default allocator.
4976   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4977   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4978 
4979   llvm::Value *Addr =
4980       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4981                               CGM.getModule(), OMPRTL___kmpc_alloc),
4982                           Args, ".dep.arr.addr");
4983   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4984       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4985   DependenciesArray = Address(Addr, Align);
4986   // Write number of elements in the first element of array for depobj.
4987   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4988   // deps[i].base_addr = NumDependencies;
4989   LValue BaseAddrLVal = CGF.EmitLValueForField(
4990       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4991   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4992   llvm::PointerUnion<unsigned *, LValue *> Pos;
4993   unsigned Idx = 1;
4994   LValue PosLVal;
4995   if (Dependencies.IteratorExpr) {
4996     PosLVal = CGF.MakeAddrLValue(
4997         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4998         C.getSizeType());
4999     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5000                           /*IsInit=*/true);
5001     Pos = &PosLVal;
5002   } else {
5003     Pos = &Idx;
5004   }
5005   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5006   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5007       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5008   return DependenciesArray;
5009 }
5010 
5011 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5012                                         SourceLocation Loc) {
5013   ASTContext &C = CGM.getContext();
5014   QualType FlagsTy;
5015   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5016   LValue Base = CGF.EmitLoadOfPointerLValue(
5017       DepobjLVal.getAddress(CGF),
5018       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5019   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5020   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5021       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5022   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5023       Addr.getPointer(),
5024       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5025   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5026                                                                CGF.VoidPtrTy);
5027   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5028   // Use default allocator.
5029   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5030   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5031 
5032   // _kmpc_free(gtid, addr, nullptr);
5033   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5034                                 CGM.getModule(), OMPRTL___kmpc_free),
5035                             Args);
5036 }
5037 
5038 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5039                                        OpenMPDependClauseKind NewDepKind,
5040                                        SourceLocation Loc) {
5041   ASTContext &C = CGM.getContext();
5042   QualType FlagsTy;
5043   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5044   RecordDecl *KmpDependInfoRD =
5045       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5046   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5047   llvm::Value *NumDeps;
5048   LValue Base;
5049   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5050 
5051   Address Begin = Base.getAddress(CGF);
5052   // Cast from pointer to array type to pointer to single element.
5053   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5054   // The basic structure here is a while-do loop.
5055   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5056   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5057   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5058   CGF.EmitBlock(BodyBB);
5059   llvm::PHINode *ElementPHI =
5060       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5061   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5062   Begin = Address(ElementPHI, Begin.getAlignment());
5063   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5064                             Base.getTBAAInfo());
5065   // deps[i].flags = NewDepKind;
5066   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5067   LValue FlagsLVal = CGF.EmitLValueForField(
5068       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5069   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5070                         FlagsLVal);
5071 
5072   // Shift the address forward by one element.
5073   Address ElementNext =
5074       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5075   ElementPHI->addIncoming(ElementNext.getPointer(),
5076                           CGF.Builder.GetInsertBlock());
5077   llvm::Value *IsEmpty =
5078       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5079   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5080   // Done.
5081   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5082 }
5083 
5084 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5085                                    const OMPExecutableDirective &D,
5086                                    llvm::Function *TaskFunction,
5087                                    QualType SharedsTy, Address Shareds,
5088                                    const Expr *IfCond,
5089                                    const OMPTaskDataTy &Data) {
5090   if (!CGF.HaveInsertPoint())
5091     return;
5092 
5093   TaskResultTy Result =
5094       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5095   llvm::Value *NewTask = Result.NewTask;
5096   llvm::Function *TaskEntry = Result.TaskEntry;
5097   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5098   LValue TDBase = Result.TDBase;
5099   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5100   // Process list of dependences.
5101   Address DependenciesArray = Address::invalid();
5102   llvm::Value *NumOfElements;
5103   std::tie(NumOfElements, DependenciesArray) =
5104       emitDependClause(CGF, Data.Dependences, Loc);
5105 
5106   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5107   // libcall.
5108   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5109   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5110   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5111   // list is not empty
5112   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5113   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5114   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5115   llvm::Value *DepTaskArgs[7];
5116   if (!Data.Dependences.empty()) {
5117     DepTaskArgs[0] = UpLoc;
5118     DepTaskArgs[1] = ThreadID;
5119     DepTaskArgs[2] = NewTask;
5120     DepTaskArgs[3] = NumOfElements;
5121     DepTaskArgs[4] = DependenciesArray.getPointer();
5122     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5123     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5124   }
5125   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5126                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5127     if (!Data.Tied) {
5128       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5129       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5130       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5131     }
5132     if (!Data.Dependences.empty()) {
5133       CGF.EmitRuntimeCall(
5134           OMPBuilder.getOrCreateRuntimeFunction(
5135               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5136           DepTaskArgs);
5137     } else {
5138       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5139                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5140                           TaskArgs);
5141     }
5142     // Check if parent region is untied and build return for untied task;
5143     if (auto *Region =
5144             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5145       Region->emitUntiedSwitch(CGF);
5146   };
5147 
5148   llvm::Value *DepWaitTaskArgs[6];
5149   if (!Data.Dependences.empty()) {
5150     DepWaitTaskArgs[0] = UpLoc;
5151     DepWaitTaskArgs[1] = ThreadID;
5152     DepWaitTaskArgs[2] = NumOfElements;
5153     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5154     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5155     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5156   }
5157   auto &M = CGM.getModule();
5158   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5159                         TaskEntry, &Data, &DepWaitTaskArgs,
5160                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5161     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5162     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5163     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5164     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5165     // is specified.
5166     if (!Data.Dependences.empty())
5167       CGF.EmitRuntimeCall(
5168           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5169           DepWaitTaskArgs);
5170     // Call proxy_task_entry(gtid, new_task);
5171     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5172                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5173       Action.Enter(CGF);
5174       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5175       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5176                                                           OutlinedFnArgs);
5177     };
5178 
5179     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5180     // kmp_task_t *new_task);
5181     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5182     // kmp_task_t *new_task);
5183     RegionCodeGenTy RCG(CodeGen);
5184     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5185                               M, OMPRTL___kmpc_omp_task_begin_if0),
5186                           TaskArgs,
5187                           OMPBuilder.getOrCreateRuntimeFunction(
5188                               M, OMPRTL___kmpc_omp_task_complete_if0),
5189                           TaskArgs);
5190     RCG.setAction(Action);
5191     RCG(CGF);
5192   };
5193 
5194   if (IfCond) {
5195     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5196   } else {
5197     RegionCodeGenTy ThenRCG(ThenCodeGen);
5198     ThenRCG(CGF);
5199   }
5200 }
5201 
5202 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5203                                        const OMPLoopDirective &D,
5204                                        llvm::Function *TaskFunction,
5205                                        QualType SharedsTy, Address Shareds,
5206                                        const Expr *IfCond,
5207                                        const OMPTaskDataTy &Data) {
5208   if (!CGF.HaveInsertPoint())
5209     return;
5210   TaskResultTy Result =
5211       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5212   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5213   // libcall.
5214   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5215   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5216   // sched, kmp_uint64 grainsize, void *task_dup);
5217   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5218   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5219   llvm::Value *IfVal;
5220   if (IfCond) {
5221     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5222                                       /*isSigned=*/true);
5223   } else {
5224     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5225   }
5226 
5227   LValue LBLVal = CGF.EmitLValueForField(
5228       Result.TDBase,
5229       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5230   const auto *LBVar =
5231       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5232   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5233                        LBLVal.getQuals(),
5234                        /*IsInitializer=*/true);
5235   LValue UBLVal = CGF.EmitLValueForField(
5236       Result.TDBase,
5237       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5238   const auto *UBVar =
5239       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5240   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5241                        UBLVal.getQuals(),
5242                        /*IsInitializer=*/true);
5243   LValue StLVal = CGF.EmitLValueForField(
5244       Result.TDBase,
5245       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5246   const auto *StVar =
5247       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5248   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5249                        StLVal.getQuals(),
5250                        /*IsInitializer=*/true);
5251   // Store reductions address.
5252   LValue RedLVal = CGF.EmitLValueForField(
5253       Result.TDBase,
5254       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5255   if (Data.Reductions) {
5256     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5257   } else {
5258     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5259                                CGF.getContext().VoidPtrTy);
5260   }
5261   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5262   llvm::Value *TaskArgs[] = {
5263       UpLoc,
5264       ThreadID,
5265       Result.NewTask,
5266       IfVal,
5267       LBLVal.getPointer(CGF),
5268       UBLVal.getPointer(CGF),
5269       CGF.EmitLoadOfScalar(StLVal, Loc),
5270       llvm::ConstantInt::getSigned(
5271           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5272       llvm::ConstantInt::getSigned(
5273           CGF.IntTy, Data.Schedule.getPointer()
5274                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5275                          : NoSchedule),
5276       Data.Schedule.getPointer()
5277           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5278                                       /*isSigned=*/false)
5279           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5280       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5281                              Result.TaskDupFn, CGF.VoidPtrTy)
5282                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5283   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5284                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5285                       TaskArgs);
5286 }
5287 
5288 /// Emit reduction operation for each element of array (required for
5289 /// array sections) LHS op = RHS.
5290 /// \param Type Type of array.
5291 /// \param LHSVar Variable on the left side of the reduction operation
5292 /// (references element of array in original variable).
5293 /// \param RHSVar Variable on the right side of the reduction operation
5294 /// (references element of array in original variable).
5295 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5296 /// RHSVar.
5297 static void EmitOMPAggregateReduction(
5298     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5299     const VarDecl *RHSVar,
5300     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5301                                   const Expr *, const Expr *)> &RedOpGen,
5302     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5303     const Expr *UpExpr = nullptr) {
5304   // Perform element-by-element initialization.
5305   QualType ElementTy;
5306   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5307   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5308 
5309   // Drill down to the base element type on both arrays.
5310   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5311   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5312 
5313   llvm::Value *RHSBegin = RHSAddr.getPointer();
5314   llvm::Value *LHSBegin = LHSAddr.getPointer();
5315   // Cast from pointer to array type to pointer to single element.
5316   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5317   // The basic structure here is a while-do loop.
5318   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5319   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5320   llvm::Value *IsEmpty =
5321       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5322   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5323 
5324   // Enter the loop body, making that address the current address.
5325   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5326   CGF.EmitBlock(BodyBB);
5327 
5328   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5329 
5330   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5331       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5332   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5333   Address RHSElementCurrent =
5334       Address(RHSElementPHI,
5335               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5336 
5337   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5338       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5339   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5340   Address LHSElementCurrent =
5341       Address(LHSElementPHI,
5342               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5343 
5344   // Emit copy.
5345   CodeGenFunction::OMPPrivateScope Scope(CGF);
5346   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5347   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5348   Scope.Privatize();
5349   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5350   Scope.ForceCleanup();
5351 
5352   // Shift the address forward by one element.
5353   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5354       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5355   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5356       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5357   // Check whether we've reached the end.
5358   llvm::Value *Done =
5359       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5360   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5361   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5362   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5363 
5364   // Done.
5365   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5366 }
5367 
5368 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5369 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5370 /// UDR combiner function.
5371 static void emitReductionCombiner(CodeGenFunction &CGF,
5372                                   const Expr *ReductionOp) {
5373   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5374     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5375       if (const auto *DRE =
5376               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5377         if (const auto *DRD =
5378                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5379           std::pair<llvm::Function *, llvm::Function *> Reduction =
5380               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5381           RValue Func = RValue::get(Reduction.first);
5382           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5383           CGF.EmitIgnoredExpr(ReductionOp);
5384           return;
5385         }
5386   CGF.EmitIgnoredExpr(ReductionOp);
5387 }
5388 
5389 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5390     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5391     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5392     ArrayRef<const Expr *> ReductionOps) {
5393   ASTContext &C = CGM.getContext();
5394 
5395   // void reduction_func(void *LHSArg, void *RHSArg);
5396   FunctionArgList Args;
5397   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5398                            ImplicitParamDecl::Other);
5399   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5400                            ImplicitParamDecl::Other);
5401   Args.push_back(&LHSArg);
5402   Args.push_back(&RHSArg);
5403   const auto &CGFI =
5404       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5405   std::string Name = getName({"omp", "reduction", "reduction_func"});
5406   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5407                                     llvm::GlobalValue::InternalLinkage, Name,
5408                                     &CGM.getModule());
5409   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5410   Fn->setDoesNotRecurse();
5411   CodeGenFunction CGF(CGM);
5412   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5413 
5414   // Dst = (void*[n])(LHSArg);
5415   // Src = (void*[n])(RHSArg);
5416   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5417       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5418       ArgsType), CGF.getPointerAlign());
5419   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5420       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5421       ArgsType), CGF.getPointerAlign());
5422 
5423   //  ...
5424   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5425   //  ...
5426   CodeGenFunction::OMPPrivateScope Scope(CGF);
5427   auto IPriv = Privates.begin();
5428   unsigned Idx = 0;
5429   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5430     const auto *RHSVar =
5431         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5432     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5433       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5434     });
5435     const auto *LHSVar =
5436         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5437     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5438       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5439     });
5440     QualType PrivTy = (*IPriv)->getType();
5441     if (PrivTy->isVariablyModifiedType()) {
5442       // Get array size and emit VLA type.
5443       ++Idx;
5444       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5445       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5446       const VariableArrayType *VLA =
5447           CGF.getContext().getAsVariableArrayType(PrivTy);
5448       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5449       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5450           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5451       CGF.EmitVariablyModifiedType(PrivTy);
5452     }
5453   }
5454   Scope.Privatize();
5455   IPriv = Privates.begin();
5456   auto ILHS = LHSExprs.begin();
5457   auto IRHS = RHSExprs.begin();
5458   for (const Expr *E : ReductionOps) {
5459     if ((*IPriv)->getType()->isArrayType()) {
5460       // Emit reduction for array section.
5461       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5462       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5463       EmitOMPAggregateReduction(
5464           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5465           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5466             emitReductionCombiner(CGF, E);
5467           });
5468     } else {
5469       // Emit reduction for array subscript or single variable.
5470       emitReductionCombiner(CGF, E);
5471     }
5472     ++IPriv;
5473     ++ILHS;
5474     ++IRHS;
5475   }
5476   Scope.ForceCleanup();
5477   CGF.FinishFunction();
5478   return Fn;
5479 }
5480 
5481 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5482                                                   const Expr *ReductionOp,
5483                                                   const Expr *PrivateRef,
5484                                                   const DeclRefExpr *LHS,
5485                                                   const DeclRefExpr *RHS) {
5486   if (PrivateRef->getType()->isArrayType()) {
5487     // Emit reduction for array section.
5488     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5489     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5490     EmitOMPAggregateReduction(
5491         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5492         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5493           emitReductionCombiner(CGF, ReductionOp);
5494         });
5495   } else {
5496     // Emit reduction for array subscript or single variable.
5497     emitReductionCombiner(CGF, ReductionOp);
5498   }
5499 }
5500 
5501 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5502                                     ArrayRef<const Expr *> Privates,
5503                                     ArrayRef<const Expr *> LHSExprs,
5504                                     ArrayRef<const Expr *> RHSExprs,
5505                                     ArrayRef<const Expr *> ReductionOps,
5506                                     ReductionOptionsTy Options) {
5507   if (!CGF.HaveInsertPoint())
5508     return;
5509 
5510   bool WithNowait = Options.WithNowait;
5511   bool SimpleReduction = Options.SimpleReduction;
5512 
5513   // Next code should be emitted for reduction:
5514   //
5515   // static kmp_critical_name lock = { 0 };
5516   //
5517   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5518   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5519   //  ...
5520   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5521   //  *(Type<n>-1*)rhs[<n>-1]);
5522   // }
5523   //
5524   // ...
5525   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5526   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5527   // RedList, reduce_func, &<lock>)) {
5528   // case 1:
5529   //  ...
5530   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5531   //  ...
5532   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5533   // break;
5534   // case 2:
5535   //  ...
5536   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5537   //  ...
5538   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5539   // break;
5540   // default:;
5541   // }
5542   //
5543   // if SimpleReduction is true, only the next code is generated:
5544   //  ...
5545   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5546   //  ...
5547 
5548   ASTContext &C = CGM.getContext();
5549 
5550   if (SimpleReduction) {
5551     CodeGenFunction::RunCleanupsScope Scope(CGF);
5552     auto IPriv = Privates.begin();
5553     auto ILHS = LHSExprs.begin();
5554     auto IRHS = RHSExprs.begin();
5555     for (const Expr *E : ReductionOps) {
5556       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5557                                   cast<DeclRefExpr>(*IRHS));
5558       ++IPriv;
5559       ++ILHS;
5560       ++IRHS;
5561     }
5562     return;
5563   }
5564 
5565   // 1. Build a list of reduction variables.
5566   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5567   auto Size = RHSExprs.size();
5568   for (const Expr *E : Privates) {
5569     if (E->getType()->isVariablyModifiedType())
5570       // Reserve place for array size.
5571       ++Size;
5572   }
5573   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5574   QualType ReductionArrayTy =
5575       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5576                              /*IndexTypeQuals=*/0);
5577   Address ReductionList =
5578       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5579   auto IPriv = Privates.begin();
5580   unsigned Idx = 0;
5581   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5582     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5583     CGF.Builder.CreateStore(
5584         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5585             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5586         Elem);
5587     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5588       // Store array size.
5589       ++Idx;
5590       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5591       llvm::Value *Size = CGF.Builder.CreateIntCast(
5592           CGF.getVLASize(
5593                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5594               .NumElts,
5595           CGF.SizeTy, /*isSigned=*/false);
5596       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5597                               Elem);
5598     }
5599   }
5600 
5601   // 2. Emit reduce_func().
5602   llvm::Function *ReductionFn = emitReductionFunction(
5603       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5604       LHSExprs, RHSExprs, ReductionOps);
5605 
5606   // 3. Create static kmp_critical_name lock = { 0 };
5607   std::string Name = getName({"reduction"});
5608   llvm::Value *Lock = getCriticalRegionLock(Name);
5609 
5610   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5611   // RedList, reduce_func, &<lock>);
5612   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5613   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5614   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5615   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5616       ReductionList.getPointer(), CGF.VoidPtrTy);
5617   llvm::Value *Args[] = {
5618       IdentTLoc,                             // ident_t *<loc>
5619       ThreadId,                              // i32 <gtid>
5620       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5621       ReductionArrayTySize,                  // size_type sizeof(RedList)
5622       RL,                                    // void *RedList
5623       ReductionFn, // void (*) (void *, void *) <reduce_func>
5624       Lock         // kmp_critical_name *&<lock>
5625   };
5626   llvm::Value *Res = CGF.EmitRuntimeCall(
5627       OMPBuilder.getOrCreateRuntimeFunction(
5628           CGM.getModule(),
5629           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5630       Args);
5631 
5632   // 5. Build switch(res)
5633   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5634   llvm::SwitchInst *SwInst =
5635       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5636 
5637   // 6. Build case 1:
5638   //  ...
5639   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5640   //  ...
5641   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5642   // break;
5643   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5644   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5645   CGF.EmitBlock(Case1BB);
5646 
5647   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5648   llvm::Value *EndArgs[] = {
5649       IdentTLoc, // ident_t *<loc>
5650       ThreadId,  // i32 <gtid>
5651       Lock       // kmp_critical_name *&<lock>
5652   };
5653   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5654                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5655     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5656     auto IPriv = Privates.begin();
5657     auto ILHS = LHSExprs.begin();
5658     auto IRHS = RHSExprs.begin();
5659     for (const Expr *E : ReductionOps) {
5660       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5661                                      cast<DeclRefExpr>(*IRHS));
5662       ++IPriv;
5663       ++ILHS;
5664       ++IRHS;
5665     }
5666   };
5667   RegionCodeGenTy RCG(CodeGen);
5668   CommonActionTy Action(
5669       nullptr, llvm::None,
5670       OMPBuilder.getOrCreateRuntimeFunction(
5671           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5672                                       : OMPRTL___kmpc_end_reduce),
5673       EndArgs);
5674   RCG.setAction(Action);
5675   RCG(CGF);
5676 
5677   CGF.EmitBranch(DefaultBB);
5678 
5679   // 7. Build case 2:
5680   //  ...
5681   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5682   //  ...
5683   // break;
5684   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5685   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5686   CGF.EmitBlock(Case2BB);
5687 
5688   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5689                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5690     auto ILHS = LHSExprs.begin();
5691     auto IRHS = RHSExprs.begin();
5692     auto IPriv = Privates.begin();
5693     for (const Expr *E : ReductionOps) {
5694       const Expr *XExpr = nullptr;
5695       const Expr *EExpr = nullptr;
5696       const Expr *UpExpr = nullptr;
5697       BinaryOperatorKind BO = BO_Comma;
5698       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5699         if (BO->getOpcode() == BO_Assign) {
5700           XExpr = BO->getLHS();
5701           UpExpr = BO->getRHS();
5702         }
5703       }
5704       // Try to emit update expression as a simple atomic.
5705       const Expr *RHSExpr = UpExpr;
5706       if (RHSExpr) {
5707         // Analyze RHS part of the whole expression.
5708         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5709                 RHSExpr->IgnoreParenImpCasts())) {
5710           // If this is a conditional operator, analyze its condition for
5711           // min/max reduction operator.
5712           RHSExpr = ACO->getCond();
5713         }
5714         if (const auto *BORHS =
5715                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5716           EExpr = BORHS->getRHS();
5717           BO = BORHS->getOpcode();
5718         }
5719       }
5720       if (XExpr) {
5721         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5722         auto &&AtomicRedGen = [BO, VD,
5723                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5724                                     const Expr *EExpr, const Expr *UpExpr) {
5725           LValue X = CGF.EmitLValue(XExpr);
5726           RValue E;
5727           if (EExpr)
5728             E = CGF.EmitAnyExpr(EExpr);
5729           CGF.EmitOMPAtomicSimpleUpdateExpr(
5730               X, E, BO, /*IsXLHSInRHSPart=*/true,
5731               llvm::AtomicOrdering::Monotonic, Loc,
5732               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5733                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5734                 PrivateScope.addPrivate(
5735                     VD, [&CGF, VD, XRValue, Loc]() {
5736                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5737                       CGF.emitOMPSimpleStore(
5738                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5739                           VD->getType().getNonReferenceType(), Loc);
5740                       return LHSTemp;
5741                     });
5742                 (void)PrivateScope.Privatize();
5743                 return CGF.EmitAnyExpr(UpExpr);
5744               });
5745         };
5746         if ((*IPriv)->getType()->isArrayType()) {
5747           // Emit atomic reduction for array section.
5748           const auto *RHSVar =
5749               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5750           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5751                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5752         } else {
5753           // Emit atomic reduction for array subscript or single variable.
5754           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5755         }
5756       } else {
5757         // Emit as a critical region.
5758         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5759                                            const Expr *, const Expr *) {
5760           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5761           std::string Name = RT.getName({"atomic_reduction"});
5762           RT.emitCriticalRegion(
5763               CGF, Name,
5764               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5765                 Action.Enter(CGF);
5766                 emitReductionCombiner(CGF, E);
5767               },
5768               Loc);
5769         };
5770         if ((*IPriv)->getType()->isArrayType()) {
5771           const auto *LHSVar =
5772               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5773           const auto *RHSVar =
5774               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5775           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5776                                     CritRedGen);
5777         } else {
5778           CritRedGen(CGF, nullptr, nullptr, nullptr);
5779         }
5780       }
5781       ++ILHS;
5782       ++IRHS;
5783       ++IPriv;
5784     }
5785   };
5786   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5787   if (!WithNowait) {
5788     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5789     llvm::Value *EndArgs[] = {
5790         IdentTLoc, // ident_t *<loc>
5791         ThreadId,  // i32 <gtid>
5792         Lock       // kmp_critical_name *&<lock>
5793     };
5794     CommonActionTy Action(nullptr, llvm::None,
5795                           OMPBuilder.getOrCreateRuntimeFunction(
5796                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5797                           EndArgs);
5798     AtomicRCG.setAction(Action);
5799     AtomicRCG(CGF);
5800   } else {
5801     AtomicRCG(CGF);
5802   }
5803 
5804   CGF.EmitBranch(DefaultBB);
5805   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5806 }
5807 
5808 /// Generates unique name for artificial threadprivate variables.
5809 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5810 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5811                                       const Expr *Ref) {
5812   SmallString<256> Buffer;
5813   llvm::raw_svector_ostream Out(Buffer);
5814   const clang::DeclRefExpr *DE;
5815   const VarDecl *D = ::getBaseDecl(Ref, DE);
5816   if (!D)
5817     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5818   D = D->getCanonicalDecl();
5819   std::string Name = CGM.getOpenMPRuntime().getName(
5820       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5821   Out << Prefix << Name << "_"
5822       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5823   return std::string(Out.str());
5824 }
5825 
5826 /// Emits reduction initializer function:
5827 /// \code
5828 /// void @.red_init(void* %arg, void* %orig) {
5829 /// %0 = bitcast void* %arg to <type>*
5830 /// store <type> <init>, <type>* %0
5831 /// ret void
5832 /// }
5833 /// \endcode
5834 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5835                                            SourceLocation Loc,
5836                                            ReductionCodeGen &RCG, unsigned N) {
5837   ASTContext &C = CGM.getContext();
5838   QualType VoidPtrTy = C.VoidPtrTy;
5839   VoidPtrTy.addRestrict();
5840   FunctionArgList Args;
5841   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5842                           ImplicitParamDecl::Other);
5843   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5844                               ImplicitParamDecl::Other);
5845   Args.emplace_back(&Param);
5846   Args.emplace_back(&ParamOrig);
5847   const auto &FnInfo =
5848       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5849   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5850   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5851   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5852                                     Name, &CGM.getModule());
5853   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5854   Fn->setDoesNotRecurse();
5855   CodeGenFunction CGF(CGM);
5856   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5857   Address PrivateAddr = CGF.EmitLoadOfPointer(
5858       CGF.GetAddrOfLocalVar(&Param),
5859       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5860   llvm::Value *Size = nullptr;
5861   // If the size of the reduction item is non-constant, load it from global
5862   // threadprivate variable.
5863   if (RCG.getSizes(N).second) {
5864     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5865         CGF, CGM.getContext().getSizeType(),
5866         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5867     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5868                                 CGM.getContext().getSizeType(), Loc);
5869   }
5870   RCG.emitAggregateType(CGF, N, Size);
5871   LValue OrigLVal;
5872   // If initializer uses initializer from declare reduction construct, emit a
5873   // pointer to the address of the original reduction item (reuired by reduction
5874   // initializer)
5875   if (RCG.usesReductionInitializer(N)) {
5876     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5877     SharedAddr = CGF.EmitLoadOfPointer(
5878         SharedAddr,
5879         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5880     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5881   } else {
5882     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5883         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5884         CGM.getContext().VoidPtrTy);
5885   }
5886   // Emit the initializer:
5887   // %0 = bitcast void* %arg to <type>*
5888   // store <type> <init>, <type>* %0
5889   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5890                          [](CodeGenFunction &) { return false; });
5891   CGF.FinishFunction();
5892   return Fn;
5893 }
5894 
5895 /// Emits reduction combiner function:
5896 /// \code
5897 /// void @.red_comb(void* %arg0, void* %arg1) {
5898 /// %lhs = bitcast void* %arg0 to <type>*
5899 /// %rhs = bitcast void* %arg1 to <type>*
5900 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5901 /// store <type> %2, <type>* %lhs
5902 /// ret void
5903 /// }
5904 /// \endcode
5905 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5906                                            SourceLocation Loc,
5907                                            ReductionCodeGen &RCG, unsigned N,
5908                                            const Expr *ReductionOp,
5909                                            const Expr *LHS, const Expr *RHS,
5910                                            const Expr *PrivateRef) {
5911   ASTContext &C = CGM.getContext();
5912   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5913   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5914   FunctionArgList Args;
5915   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5916                                C.VoidPtrTy, ImplicitParamDecl::Other);
5917   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5918                             ImplicitParamDecl::Other);
5919   Args.emplace_back(&ParamInOut);
5920   Args.emplace_back(&ParamIn);
5921   const auto &FnInfo =
5922       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5923   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5924   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5925   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5926                                     Name, &CGM.getModule());
5927   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5928   Fn->setDoesNotRecurse();
5929   CodeGenFunction CGF(CGM);
5930   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5931   llvm::Value *Size = nullptr;
5932   // If the size of the reduction item is non-constant, load it from global
5933   // threadprivate variable.
5934   if (RCG.getSizes(N).second) {
5935     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5936         CGF, CGM.getContext().getSizeType(),
5937         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5938     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5939                                 CGM.getContext().getSizeType(), Loc);
5940   }
5941   RCG.emitAggregateType(CGF, N, Size);
5942   // Remap lhs and rhs variables to the addresses of the function arguments.
5943   // %lhs = bitcast void* %arg0 to <type>*
5944   // %rhs = bitcast void* %arg1 to <type>*
5945   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5946   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5947     // Pull out the pointer to the variable.
5948     Address PtrAddr = CGF.EmitLoadOfPointer(
5949         CGF.GetAddrOfLocalVar(&ParamInOut),
5950         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5951     return CGF.Builder.CreateElementBitCast(
5952         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5953   });
5954   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5955     // Pull out the pointer to the variable.
5956     Address PtrAddr = CGF.EmitLoadOfPointer(
5957         CGF.GetAddrOfLocalVar(&ParamIn),
5958         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5959     return CGF.Builder.CreateElementBitCast(
5960         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5961   });
5962   PrivateScope.Privatize();
5963   // Emit the combiner body:
5964   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5965   // store <type> %2, <type>* %lhs
5966   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5967       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5968       cast<DeclRefExpr>(RHS));
5969   CGF.FinishFunction();
5970   return Fn;
5971 }
5972 
5973 /// Emits reduction finalizer function:
5974 /// \code
5975 /// void @.red_fini(void* %arg) {
5976 /// %0 = bitcast void* %arg to <type>*
5977 /// <destroy>(<type>* %0)
5978 /// ret void
5979 /// }
5980 /// \endcode
5981 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5982                                            SourceLocation Loc,
5983                                            ReductionCodeGen &RCG, unsigned N) {
5984   if (!RCG.needCleanups(N))
5985     return nullptr;
5986   ASTContext &C = CGM.getContext();
5987   FunctionArgList Args;
5988   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5989                           ImplicitParamDecl::Other);
5990   Args.emplace_back(&Param);
5991   const auto &FnInfo =
5992       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5993   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5995   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5996                                     Name, &CGM.getModule());
5997   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998   Fn->setDoesNotRecurse();
5999   CodeGenFunction CGF(CGM);
6000   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001   Address PrivateAddr = CGF.EmitLoadOfPointer(
6002       CGF.GetAddrOfLocalVar(&Param),
6003       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004   llvm::Value *Size = nullptr;
6005   // If the size of the reduction item is non-constant, load it from global
6006   // threadprivate variable.
6007   if (RCG.getSizes(N).second) {
6008     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009         CGF, CGM.getContext().getSizeType(),
6010         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012                                 CGM.getContext().getSizeType(), Loc);
6013   }
6014   RCG.emitAggregateType(CGF, N, Size);
6015   // Emit the finalizer body:
6016   // <destroy>(<type>* %0)
6017   RCG.emitCleanups(CGF, N, PrivateAddr);
6018   CGF.FinishFunction(Loc);
6019   return Fn;
6020 }
6021 
6022 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6023     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6024     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6025   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6026     return nullptr;
6027 
6028   // Build typedef struct:
6029   // kmp_taskred_input {
6030   //   void *reduce_shar; // shared reduction item
6031   //   void *reduce_orig; // original reduction item used for initialization
6032   //   size_t reduce_size; // size of data item
6033   //   void *reduce_init; // data initialization routine
6034   //   void *reduce_fini; // data finalization routine
6035   //   void *reduce_comb; // data combiner routine
6036   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6037   // } kmp_taskred_input_t;
6038   ASTContext &C = CGM.getContext();
6039   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6040   RD->startDefinition();
6041   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6042   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6043   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6044   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6045   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6046   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6047   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6048       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6049   RD->completeDefinition();
6050   QualType RDType = C.getRecordType(RD);
6051   unsigned Size = Data.ReductionVars.size();
6052   llvm::APInt ArraySize(/*numBits=*/64, Size);
6053   QualType ArrayRDType = C.getConstantArrayType(
6054       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6055   // kmp_task_red_input_t .rd_input.[Size];
6056   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6057   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6058                        Data.ReductionCopies, Data.ReductionOps);
6059   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6060     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6061     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6062                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6063     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6064         TaskRedInput.getPointer(), Idxs,
6065         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6066         ".rd_input.gep.");
6067     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6068     // ElemLVal.reduce_shar = &Shareds[Cnt];
6069     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6070     RCG.emitSharedOrigLValue(CGF, Cnt);
6071     llvm::Value *CastedShared =
6072         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6073     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6074     // ElemLVal.reduce_orig = &Origs[Cnt];
6075     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6076     llvm::Value *CastedOrig =
6077         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6078     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6079     RCG.emitAggregateType(CGF, Cnt);
6080     llvm::Value *SizeValInChars;
6081     llvm::Value *SizeVal;
6082     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6083     // We use delayed creation/initialization for VLAs and array sections. It is
6084     // required because runtime does not provide the way to pass the sizes of
6085     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6086     // threadprivate global variables are used to store these values and use
6087     // them in the functions.
6088     bool DelayedCreation = !!SizeVal;
6089     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6090                                                /*isSigned=*/false);
6091     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6092     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6093     // ElemLVal.reduce_init = init;
6094     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6095     llvm::Value *InitAddr =
6096         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6097     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6098     // ElemLVal.reduce_fini = fini;
6099     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6100     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6101     llvm::Value *FiniAddr = Fini
6102                                 ? CGF.EmitCastToVoidPtr(Fini)
6103                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6104     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6105     // ElemLVal.reduce_comb = comb;
6106     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6107     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6108         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6109         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6110     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6111     // ElemLVal.flags = 0;
6112     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6113     if (DelayedCreation) {
6114       CGF.EmitStoreOfScalar(
6115           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6116           FlagsLVal);
6117     } else
6118       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6119                                  FlagsLVal.getType());
6120   }
6121   if (Data.IsReductionWithTaskMod) {
6122     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6123     // is_ws, int num, void *data);
6124     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6125     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6126                                                   CGM.IntTy, /*isSigned=*/true);
6127     llvm::Value *Args[] = {
6128         IdentTLoc, GTid,
6129         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6130                                /*isSigned=*/true),
6131         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6132         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6133             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6134     return CGF.EmitRuntimeCall(
6135         OMPBuilder.getOrCreateRuntimeFunction(
6136             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6137         Args);
6138   }
6139   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6140   llvm::Value *Args[] = {
6141       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6142                                 /*isSigned=*/true),
6143       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6144       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6145                                                       CGM.VoidPtrTy)};
6146   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6147                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6148                              Args);
6149 }
6150 
6151 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6152                                             SourceLocation Loc,
6153                                             bool IsWorksharingReduction) {
6154   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6155   // is_ws, int num, void *data);
6156   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6157   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6158                                                 CGM.IntTy, /*isSigned=*/true);
6159   llvm::Value *Args[] = {IdentTLoc, GTid,
6160                          llvm::ConstantInt::get(CGM.IntTy,
6161                                                 IsWorksharingReduction ? 1 : 0,
6162                                                 /*isSigned=*/true)};
6163   (void)CGF.EmitRuntimeCall(
6164       OMPBuilder.getOrCreateRuntimeFunction(
6165           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6166       Args);
6167 }
6168 
6169 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6170                                               SourceLocation Loc,
6171                                               ReductionCodeGen &RCG,
6172                                               unsigned N) {
6173   auto Sizes = RCG.getSizes(N);
6174   // Emit threadprivate global variable if the type is non-constant
6175   // (Sizes.second = nullptr).
6176   if (Sizes.second) {
6177     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6178                                                      /*isSigned=*/false);
6179     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6180         CGF, CGM.getContext().getSizeType(),
6181         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6182     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6183   }
6184 }
6185 
6186 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6187                                               SourceLocation Loc,
6188                                               llvm::Value *ReductionsPtr,
6189                                               LValue SharedLVal) {
6190   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6191   // *d);
6192   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6193                                                    CGM.IntTy,
6194                                                    /*isSigned=*/true),
6195                          ReductionsPtr,
6196                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6197                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6198   return Address(
6199       CGF.EmitRuntimeCall(
6200           OMPBuilder.getOrCreateRuntimeFunction(
6201               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6202           Args),
6203       SharedLVal.getAlignment());
6204 }
6205 
6206 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6207                                        SourceLocation Loc) {
6208   if (!CGF.HaveInsertPoint())
6209     return;
6210 
6211   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6212     OMPBuilder.createTaskwait(CGF.Builder);
6213   } else {
6214     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6215     // global_tid);
6216     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6217     // Ignore return result until untied tasks are supported.
6218     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6219                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6220                         Args);
6221   }
6222 
6223   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6224     Region->emitUntiedSwitch(CGF);
6225 }
6226 
6227 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6228                                            OpenMPDirectiveKind InnerKind,
6229                                            const RegionCodeGenTy &CodeGen,
6230                                            bool HasCancel) {
6231   if (!CGF.HaveInsertPoint())
6232     return;
6233   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6234                                  InnerKind != OMPD_critical &&
6235                                      InnerKind != OMPD_master);
6236   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6237 }
6238 
6239 namespace {
6240 enum RTCancelKind {
6241   CancelNoreq = 0,
6242   CancelParallel = 1,
6243   CancelLoop = 2,
6244   CancelSections = 3,
6245   CancelTaskgroup = 4
6246 };
6247 } // anonymous namespace
6248 
6249 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6250   RTCancelKind CancelKind = CancelNoreq;
6251   if (CancelRegion == OMPD_parallel)
6252     CancelKind = CancelParallel;
6253   else if (CancelRegion == OMPD_for)
6254     CancelKind = CancelLoop;
6255   else if (CancelRegion == OMPD_sections)
6256     CancelKind = CancelSections;
6257   else {
6258     assert(CancelRegion == OMPD_taskgroup);
6259     CancelKind = CancelTaskgroup;
6260   }
6261   return CancelKind;
6262 }
6263 
6264 void CGOpenMPRuntime::emitCancellationPointCall(
6265     CodeGenFunction &CGF, SourceLocation Loc,
6266     OpenMPDirectiveKind CancelRegion) {
6267   if (!CGF.HaveInsertPoint())
6268     return;
6269   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6270   // global_tid, kmp_int32 cncl_kind);
6271   if (auto *OMPRegionInfo =
6272           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6273     // For 'cancellation point taskgroup', the task region info may not have a
6274     // cancel. This may instead happen in another adjacent task.
6275     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6276       llvm::Value *Args[] = {
6277           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6278           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6279       // Ignore return result until untied tasks are supported.
6280       llvm::Value *Result = CGF.EmitRuntimeCall(
6281           OMPBuilder.getOrCreateRuntimeFunction(
6282               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6283           Args);
6284       // if (__kmpc_cancellationpoint()) {
6285       //   exit from construct;
6286       // }
6287       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6288       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6289       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6290       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6291       CGF.EmitBlock(ExitBB);
6292       // exit from construct;
6293       CodeGenFunction::JumpDest CancelDest =
6294           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6295       CGF.EmitBranchThroughCleanup(CancelDest);
6296       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6297     }
6298   }
6299 }
6300 
6301 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6302                                      const Expr *IfCond,
6303                                      OpenMPDirectiveKind CancelRegion) {
6304   if (!CGF.HaveInsertPoint())
6305     return;
6306   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6307   // kmp_int32 cncl_kind);
6308   auto &M = CGM.getModule();
6309   if (auto *OMPRegionInfo =
6310           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6312                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6313       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6314       llvm::Value *Args[] = {
6315           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6316           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317       // Ignore return result until untied tasks are supported.
6318       llvm::Value *Result = CGF.EmitRuntimeCall(
6319           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6320       // if (__kmpc_cancel()) {
6321       //   exit from construct;
6322       // }
6323       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6324       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6325       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6326       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6327       CGF.EmitBlock(ExitBB);
6328       // exit from construct;
6329       CodeGenFunction::JumpDest CancelDest =
6330           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6331       CGF.EmitBranchThroughCleanup(CancelDest);
6332       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6333     };
6334     if (IfCond) {
6335       emitIfClause(CGF, IfCond, ThenGen,
6336                    [](CodeGenFunction &, PrePostActionTy &) {});
6337     } else {
6338       RegionCodeGenTy ThenRCG(ThenGen);
6339       ThenRCG(CGF);
6340     }
6341   }
6342 }
6343 
6344 namespace {
6345 /// Cleanup action for uses_allocators support.
6346 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6347   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6348 
6349 public:
6350   OMPUsesAllocatorsActionTy(
6351       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6352       : Allocators(Allocators) {}
6353   void Enter(CodeGenFunction &CGF) override {
6354     if (!CGF.HaveInsertPoint())
6355       return;
6356     for (const auto &AllocatorData : Allocators) {
6357       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6358           CGF, AllocatorData.first, AllocatorData.second);
6359     }
6360   }
6361   void Exit(CodeGenFunction &CGF) override {
6362     if (!CGF.HaveInsertPoint())
6363       return;
6364     for (const auto &AllocatorData : Allocators) {
6365       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6366                                                         AllocatorData.first);
6367     }
6368   }
6369 };
6370 } // namespace
6371 
6372 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6373     const OMPExecutableDirective &D, StringRef ParentName,
6374     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6375     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6376   assert(!ParentName.empty() && "Invalid target region parent name!");
6377   HasEmittedTargetRegion = true;
6378   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6379   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6380     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6381       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6382       if (!D.AllocatorTraits)
6383         continue;
6384       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6385     }
6386   }
6387   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6388   CodeGen.setAction(UsesAllocatorAction);
6389   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6390                                    IsOffloadEntry, CodeGen);
6391 }
6392 
6393 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6394                                              const Expr *Allocator,
6395                                              const Expr *AllocatorTraits) {
6396   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6397   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6398   // Use default memspace handle.
6399   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6400   llvm::Value *NumTraits = llvm::ConstantInt::get(
6401       CGF.IntTy, cast<ConstantArrayType>(
6402                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6403                      ->getSize()
6404                      .getLimitedValue());
6405   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6406   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6407       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6408   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6409                                            AllocatorTraitsLVal.getBaseInfo(),
6410                                            AllocatorTraitsLVal.getTBAAInfo());
6411   llvm::Value *Traits =
6412       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6413 
6414   llvm::Value *AllocatorVal =
6415       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6416                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6417                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6418   // Store to allocator.
6419   CGF.EmitVarDecl(*cast<VarDecl>(
6420       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6421   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6422   AllocatorVal =
6423       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6424                                Allocator->getType(), Allocator->getExprLoc());
6425   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6426 }
6427 
6428 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6429                                              const Expr *Allocator) {
6430   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6431   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6432   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6433   llvm::Value *AllocatorVal =
6434       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6435   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6436                                           CGF.getContext().VoidPtrTy,
6437                                           Allocator->getExprLoc());
6438   (void)CGF.EmitRuntimeCall(
6439       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6440                                             OMPRTL___kmpc_destroy_allocator),
6441       {ThreadId, AllocatorVal});
6442 }
6443 
6444 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6445     const OMPExecutableDirective &D, StringRef ParentName,
6446     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6447     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6448   // Create a unique name for the entry function using the source location
6449   // information of the current target region. The name will be something like:
6450   //
6451   // __omp_offloading_DD_FFFF_PP_lBB
6452   //
6453   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6454   // mangled name of the function that encloses the target region and BB is the
6455   // line number of the target region.
6456 
6457   unsigned DeviceID;
6458   unsigned FileID;
6459   unsigned Line;
6460   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6461                            Line);
6462   SmallString<64> EntryFnName;
6463   {
6464     llvm::raw_svector_ostream OS(EntryFnName);
6465     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6466        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6467   }
6468 
6469   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6470 
6471   CodeGenFunction CGF(CGM, true);
6472   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6473   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6474 
6475   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6476 
6477   // If this target outline function is not an offload entry, we don't need to
6478   // register it.
6479   if (!IsOffloadEntry)
6480     return;
6481 
6482   // The target region ID is used by the runtime library to identify the current
6483   // target region, so it only has to be unique and not necessarily point to
6484   // anything. It could be the pointer to the outlined function that implements
6485   // the target region, but we aren't using that so that the compiler doesn't
6486   // need to keep that, and could therefore inline the host function if proven
6487   // worthwhile during optimization. In the other hand, if emitting code for the
6488   // device, the ID has to be the function address so that it can retrieved from
6489   // the offloading entry and launched by the runtime library. We also mark the
6490   // outlined function to have external linkage in case we are emitting code for
6491   // the device, because these functions will be entry points to the device.
6492 
6493   if (CGM.getLangOpts().OpenMPIsDevice) {
6494     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6495     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6496     OutlinedFn->setDSOLocal(false);
6497     if (CGM.getTriple().isAMDGCN())
6498       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6499   } else {
6500     std::string Name = getName({EntryFnName, "region_id"});
6501     OutlinedFnID = new llvm::GlobalVariable(
6502         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6503         llvm::GlobalValue::WeakAnyLinkage,
6504         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6505   }
6506 
6507   // Register the information for the entry associated with this target region.
6508   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6509       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6510       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6511 }
6512 
6513 /// Checks if the expression is constant or does not have non-trivial function
6514 /// calls.
6515 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6516   // We can skip constant expressions.
6517   // We can skip expressions with trivial calls or simple expressions.
6518   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6519           !E->hasNonTrivialCall(Ctx)) &&
6520          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6521 }
6522 
6523 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6524                                                     const Stmt *Body) {
6525   const Stmt *Child = Body->IgnoreContainers();
6526   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6527     Child = nullptr;
6528     for (const Stmt *S : C->body()) {
6529       if (const auto *E = dyn_cast<Expr>(S)) {
6530         if (isTrivial(Ctx, E))
6531           continue;
6532       }
6533       // Some of the statements can be ignored.
6534       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6535           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6536         continue;
6537       // Analyze declarations.
6538       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6539         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6540               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6541                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6542                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6543                   isa<UsingDirectiveDecl>(D) ||
6544                   isa<OMPDeclareReductionDecl>(D) ||
6545                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6546                 return true;
6547               const auto *VD = dyn_cast<VarDecl>(D);
6548               if (!VD)
6549                 return false;
6550               return VD->isConstexpr() ||
6551                      ((VD->getType().isTrivialType(Ctx) ||
6552                        VD->getType()->isReferenceType()) &&
6553                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6554             }))
6555           continue;
6556       }
6557       // Found multiple children - cannot get the one child only.
6558       if (Child)
6559         return nullptr;
6560       Child = S;
6561     }
6562     if (Child)
6563       Child = Child->IgnoreContainers();
6564   }
6565   return Child;
6566 }
6567 
6568 /// Emit the number of teams for a target directive.  Inspect the num_teams
6569 /// clause associated with a teams construct combined or closely nested
6570 /// with the target directive.
6571 ///
6572 /// Emit a team of size one for directives such as 'target parallel' that
6573 /// have no associated teams construct.
6574 ///
6575 /// Otherwise, return nullptr.
6576 static llvm::Value *
6577 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6578                                const OMPExecutableDirective &D) {
6579   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6580          "Clauses associated with the teams directive expected to be emitted "
6581          "only for the host!");
6582   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6583   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6584          "Expected target-based executable directive.");
6585   CGBuilderTy &Bld = CGF.Builder;
6586   switch (DirectiveKind) {
6587   case OMPD_target: {
6588     const auto *CS = D.getInnermostCapturedStmt();
6589     const auto *Body =
6590         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6591     const Stmt *ChildStmt =
6592         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6593     if (const auto *NestedDir =
6594             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6595       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6596         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6597           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6598           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6599           const Expr *NumTeams =
6600               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6601           llvm::Value *NumTeamsVal =
6602               CGF.EmitScalarExpr(NumTeams,
6603                                  /*IgnoreResultAssign*/ true);
6604           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6605                                    /*isSigned=*/true);
6606         }
6607         return Bld.getInt32(0);
6608       }
6609       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6610           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6611         return Bld.getInt32(1);
6612       return Bld.getInt32(0);
6613     }
6614     return nullptr;
6615   }
6616   case OMPD_target_teams:
6617   case OMPD_target_teams_distribute:
6618   case OMPD_target_teams_distribute_simd:
6619   case OMPD_target_teams_distribute_parallel_for:
6620   case OMPD_target_teams_distribute_parallel_for_simd: {
6621     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6622       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6623       const Expr *NumTeams =
6624           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6625       llvm::Value *NumTeamsVal =
6626           CGF.EmitScalarExpr(NumTeams,
6627                              /*IgnoreResultAssign*/ true);
6628       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6629                                /*isSigned=*/true);
6630     }
6631     return Bld.getInt32(0);
6632   }
6633   case OMPD_target_parallel:
6634   case OMPD_target_parallel_for:
6635   case OMPD_target_parallel_for_simd:
6636   case OMPD_target_simd:
6637     return Bld.getInt32(1);
6638   case OMPD_parallel:
6639   case OMPD_for:
6640   case OMPD_parallel_for:
6641   case OMPD_parallel_master:
6642   case OMPD_parallel_sections:
6643   case OMPD_for_simd:
6644   case OMPD_parallel_for_simd:
6645   case OMPD_cancel:
6646   case OMPD_cancellation_point:
6647   case OMPD_ordered:
6648   case OMPD_threadprivate:
6649   case OMPD_allocate:
6650   case OMPD_task:
6651   case OMPD_simd:
6652   case OMPD_tile:
6653   case OMPD_sections:
6654   case OMPD_section:
6655   case OMPD_single:
6656   case OMPD_master:
6657   case OMPD_critical:
6658   case OMPD_taskyield:
6659   case OMPD_barrier:
6660   case OMPD_taskwait:
6661   case OMPD_taskgroup:
6662   case OMPD_atomic:
6663   case OMPD_flush:
6664   case OMPD_depobj:
6665   case OMPD_scan:
6666   case OMPD_teams:
6667   case OMPD_target_data:
6668   case OMPD_target_exit_data:
6669   case OMPD_target_enter_data:
6670   case OMPD_distribute:
6671   case OMPD_distribute_simd:
6672   case OMPD_distribute_parallel_for:
6673   case OMPD_distribute_parallel_for_simd:
6674   case OMPD_teams_distribute:
6675   case OMPD_teams_distribute_simd:
6676   case OMPD_teams_distribute_parallel_for:
6677   case OMPD_teams_distribute_parallel_for_simd:
6678   case OMPD_target_update:
6679   case OMPD_declare_simd:
6680   case OMPD_declare_variant:
6681   case OMPD_begin_declare_variant:
6682   case OMPD_end_declare_variant:
6683   case OMPD_declare_target:
6684   case OMPD_end_declare_target:
6685   case OMPD_declare_reduction:
6686   case OMPD_declare_mapper:
6687   case OMPD_taskloop:
6688   case OMPD_taskloop_simd:
6689   case OMPD_master_taskloop:
6690   case OMPD_master_taskloop_simd:
6691   case OMPD_parallel_master_taskloop:
6692   case OMPD_parallel_master_taskloop_simd:
6693   case OMPD_requires:
6694   case OMPD_unknown:
6695     break;
6696   default:
6697     break;
6698   }
6699   llvm_unreachable("Unexpected directive kind.");
6700 }
6701 
6702 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6703                                   llvm::Value *DefaultThreadLimitVal) {
6704   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6705       CGF.getContext(), CS->getCapturedStmt());
6706   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6707     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6708       llvm::Value *NumThreads = nullptr;
6709       llvm::Value *CondVal = nullptr;
6710       // Handle if clause. If if clause present, the number of threads is
6711       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6712       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6713         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6714         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6715         const OMPIfClause *IfClause = nullptr;
6716         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6717           if (C->getNameModifier() == OMPD_unknown ||
6718               C->getNameModifier() == OMPD_parallel) {
6719             IfClause = C;
6720             break;
6721           }
6722         }
6723         if (IfClause) {
6724           const Expr *Cond = IfClause->getCondition();
6725           bool Result;
6726           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6727             if (!Result)
6728               return CGF.Builder.getInt32(1);
6729           } else {
6730             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6731             if (const auto *PreInit =
6732                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6733               for (const auto *I : PreInit->decls()) {
6734                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6735                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6736                 } else {
6737                   CodeGenFunction::AutoVarEmission Emission =
6738                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6739                   CGF.EmitAutoVarCleanups(Emission);
6740                 }
6741               }
6742             }
6743             CondVal = CGF.EvaluateExprAsBool(Cond);
6744           }
6745         }
6746       }
6747       // Check the value of num_threads clause iff if clause was not specified
6748       // or is not evaluated to false.
6749       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6750         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6751         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6752         const auto *NumThreadsClause =
6753             Dir->getSingleClause<OMPNumThreadsClause>();
6754         CodeGenFunction::LexicalScope Scope(
6755             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6756         if (const auto *PreInit =
6757                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6758           for (const auto *I : PreInit->decls()) {
6759             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6760               CGF.EmitVarDecl(cast<VarDecl>(*I));
6761             } else {
6762               CodeGenFunction::AutoVarEmission Emission =
6763                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6764               CGF.EmitAutoVarCleanups(Emission);
6765             }
6766           }
6767         }
6768         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6769         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6770                                                /*isSigned=*/false);
6771         if (DefaultThreadLimitVal)
6772           NumThreads = CGF.Builder.CreateSelect(
6773               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6774               DefaultThreadLimitVal, NumThreads);
6775       } else {
6776         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6777                                            : CGF.Builder.getInt32(0);
6778       }
6779       // Process condition of the if clause.
6780       if (CondVal) {
6781         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6782                                               CGF.Builder.getInt32(1));
6783       }
6784       return NumThreads;
6785     }
6786     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6787       return CGF.Builder.getInt32(1);
6788     return DefaultThreadLimitVal;
6789   }
6790   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6791                                : CGF.Builder.getInt32(0);
6792 }
6793 
6794 /// Emit the number of threads for a target directive.  Inspect the
6795 /// thread_limit clause associated with a teams construct combined or closely
6796 /// nested with the target directive.
6797 ///
6798 /// Emit the num_threads clause for directives such as 'target parallel' that
6799 /// have no associated teams construct.
6800 ///
6801 /// Otherwise, return nullptr.
6802 static llvm::Value *
6803 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6804                                  const OMPExecutableDirective &D) {
6805   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6806          "Clauses associated with the teams directive expected to be emitted "
6807          "only for the host!");
6808   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6809   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6810          "Expected target-based executable directive.");
6811   CGBuilderTy &Bld = CGF.Builder;
6812   llvm::Value *ThreadLimitVal = nullptr;
6813   llvm::Value *NumThreadsVal = nullptr;
6814   switch (DirectiveKind) {
6815   case OMPD_target: {
6816     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6817     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6818       return NumThreads;
6819     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6820         CGF.getContext(), CS->getCapturedStmt());
6821     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6822       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6823         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6824         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6825         const auto *ThreadLimitClause =
6826             Dir->getSingleClause<OMPThreadLimitClause>();
6827         CodeGenFunction::LexicalScope Scope(
6828             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6829         if (const auto *PreInit =
6830                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6831           for (const auto *I : PreInit->decls()) {
6832             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6833               CGF.EmitVarDecl(cast<VarDecl>(*I));
6834             } else {
6835               CodeGenFunction::AutoVarEmission Emission =
6836                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6837               CGF.EmitAutoVarCleanups(Emission);
6838             }
6839           }
6840         }
6841         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6842             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6843         ThreadLimitVal =
6844             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6845       }
6846       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6847           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6848         CS = Dir->getInnermostCapturedStmt();
6849         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6850             CGF.getContext(), CS->getCapturedStmt());
6851         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6852       }
6853       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6854           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6855         CS = Dir->getInnermostCapturedStmt();
6856         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6857           return NumThreads;
6858       }
6859       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6860         return Bld.getInt32(1);
6861     }
6862     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6863   }
6864   case OMPD_target_teams: {
6865     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6866       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6867       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6868       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6869           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6870       ThreadLimitVal =
6871           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6872     }
6873     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6874     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6875       return NumThreads;
6876     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6877         CGF.getContext(), CS->getCapturedStmt());
6878     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6879       if (Dir->getDirectiveKind() == OMPD_distribute) {
6880         CS = Dir->getInnermostCapturedStmt();
6881         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6882           return NumThreads;
6883       }
6884     }
6885     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6886   }
6887   case OMPD_target_teams_distribute:
6888     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6889       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6890       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6891       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6892           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6893       ThreadLimitVal =
6894           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6895     }
6896     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6897   case OMPD_target_parallel:
6898   case OMPD_target_parallel_for:
6899   case OMPD_target_parallel_for_simd:
6900   case OMPD_target_teams_distribute_parallel_for:
6901   case OMPD_target_teams_distribute_parallel_for_simd: {
6902     llvm::Value *CondVal = nullptr;
6903     // Handle if clause. If if clause present, the number of threads is
6904     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6905     if (D.hasClausesOfKind<OMPIfClause>()) {
6906       const OMPIfClause *IfClause = nullptr;
6907       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6908         if (C->getNameModifier() == OMPD_unknown ||
6909             C->getNameModifier() == OMPD_parallel) {
6910           IfClause = C;
6911           break;
6912         }
6913       }
6914       if (IfClause) {
6915         const Expr *Cond = IfClause->getCondition();
6916         bool Result;
6917         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6918           if (!Result)
6919             return Bld.getInt32(1);
6920         } else {
6921           CodeGenFunction::RunCleanupsScope Scope(CGF);
6922           CondVal = CGF.EvaluateExprAsBool(Cond);
6923         }
6924       }
6925     }
6926     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6927       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6928       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6929       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6930           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6931       ThreadLimitVal =
6932           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6933     }
6934     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6935       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6936       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6937       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6938           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6939       NumThreadsVal =
6940           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6941       ThreadLimitVal = ThreadLimitVal
6942                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6943                                                                 ThreadLimitVal),
6944                                               NumThreadsVal, ThreadLimitVal)
6945                            : NumThreadsVal;
6946     }
6947     if (!ThreadLimitVal)
6948       ThreadLimitVal = Bld.getInt32(0);
6949     if (CondVal)
6950       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6951     return ThreadLimitVal;
6952   }
6953   case OMPD_target_teams_distribute_simd:
6954   case OMPD_target_simd:
6955     return Bld.getInt32(1);
6956   case OMPD_parallel:
6957   case OMPD_for:
6958   case OMPD_parallel_for:
6959   case OMPD_parallel_master:
6960   case OMPD_parallel_sections:
6961   case OMPD_for_simd:
6962   case OMPD_parallel_for_simd:
6963   case OMPD_cancel:
6964   case OMPD_cancellation_point:
6965   case OMPD_ordered:
6966   case OMPD_threadprivate:
6967   case OMPD_allocate:
6968   case OMPD_task:
6969   case OMPD_simd:
6970   case OMPD_tile:
6971   case OMPD_sections:
6972   case OMPD_section:
6973   case OMPD_single:
6974   case OMPD_master:
6975   case OMPD_critical:
6976   case OMPD_taskyield:
6977   case OMPD_barrier:
6978   case OMPD_taskwait:
6979   case OMPD_taskgroup:
6980   case OMPD_atomic:
6981   case OMPD_flush:
6982   case OMPD_depobj:
6983   case OMPD_scan:
6984   case OMPD_teams:
6985   case OMPD_target_data:
6986   case OMPD_target_exit_data:
6987   case OMPD_target_enter_data:
6988   case OMPD_distribute:
6989   case OMPD_distribute_simd:
6990   case OMPD_distribute_parallel_for:
6991   case OMPD_distribute_parallel_for_simd:
6992   case OMPD_teams_distribute:
6993   case OMPD_teams_distribute_simd:
6994   case OMPD_teams_distribute_parallel_for:
6995   case OMPD_teams_distribute_parallel_for_simd:
6996   case OMPD_target_update:
6997   case OMPD_declare_simd:
6998   case OMPD_declare_variant:
6999   case OMPD_begin_declare_variant:
7000   case OMPD_end_declare_variant:
7001   case OMPD_declare_target:
7002   case OMPD_end_declare_target:
7003   case OMPD_declare_reduction:
7004   case OMPD_declare_mapper:
7005   case OMPD_taskloop:
7006   case OMPD_taskloop_simd:
7007   case OMPD_master_taskloop:
7008   case OMPD_master_taskloop_simd:
7009   case OMPD_parallel_master_taskloop:
7010   case OMPD_parallel_master_taskloop_simd:
7011   case OMPD_requires:
7012   case OMPD_unknown:
7013     break;
7014   default:
7015     break;
7016   }
7017   llvm_unreachable("Unsupported directive kind.");
7018 }
7019 
7020 namespace {
7021 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7022 
7023 // Utility to handle information from clauses associated with a given
7024 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7025 // It provides a convenient interface to obtain the information and generate
7026 // code for that information.
7027 class MappableExprsHandler {
7028 public:
7029   /// Values for bit flags used to specify the mapping type for
7030   /// offloading.
7031   enum OpenMPOffloadMappingFlags : uint64_t {
7032     /// No flags
7033     OMP_MAP_NONE = 0x0,
7034     /// Allocate memory on the device and move data from host to device.
7035     OMP_MAP_TO = 0x01,
7036     /// Allocate memory on the device and move data from device to host.
7037     OMP_MAP_FROM = 0x02,
7038     /// Always perform the requested mapping action on the element, even
7039     /// if it was already mapped before.
7040     OMP_MAP_ALWAYS = 0x04,
7041     /// Delete the element from the device environment, ignoring the
7042     /// current reference count associated with the element.
7043     OMP_MAP_DELETE = 0x08,
7044     /// The element being mapped is a pointer-pointee pair; both the
7045     /// pointer and the pointee should be mapped.
7046     OMP_MAP_PTR_AND_OBJ = 0x10,
7047     /// This flags signals that the base address of an entry should be
7048     /// passed to the target kernel as an argument.
7049     OMP_MAP_TARGET_PARAM = 0x20,
7050     /// Signal that the runtime library has to return the device pointer
7051     /// in the current position for the data being mapped. Used when we have the
7052     /// use_device_ptr or use_device_addr clause.
7053     OMP_MAP_RETURN_PARAM = 0x40,
7054     /// This flag signals that the reference being passed is a pointer to
7055     /// private data.
7056     OMP_MAP_PRIVATE = 0x80,
7057     /// Pass the element to the device by value.
7058     OMP_MAP_LITERAL = 0x100,
7059     /// Implicit map
7060     OMP_MAP_IMPLICIT = 0x200,
7061     /// Close is a hint to the runtime to allocate memory close to
7062     /// the target device.
7063     OMP_MAP_CLOSE = 0x400,
7064     /// 0x800 is reserved for compatibility with XLC.
7065     /// Produce a runtime error if the data is not already allocated.
7066     OMP_MAP_PRESENT = 0x1000,
7067     /// Signal that the runtime library should use args as an array of
7068     /// descriptor_dim pointers and use args_size as dims. Used when we have
7069     /// non-contiguous list items in target update directive
7070     OMP_MAP_NON_CONTIG = 0x100000000000,
7071     /// The 16 MSBs of the flags indicate whether the entry is member of some
7072     /// struct/class.
7073     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7074     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7075   };
7076 
7077   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7078   static unsigned getFlagMemberOffset() {
7079     unsigned Offset = 0;
7080     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7081          Remain = Remain >> 1)
7082       Offset++;
7083     return Offset;
7084   }
7085 
7086   /// Class that holds debugging information for a data mapping to be passed to
7087   /// the runtime library.
7088   class MappingExprInfo {
7089     /// The variable declaration used for the data mapping.
7090     const ValueDecl *MapDecl = nullptr;
7091     /// The original expression used in the map clause, or null if there is
7092     /// none.
7093     const Expr *MapExpr = nullptr;
7094 
7095   public:
7096     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7097         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7098 
7099     const ValueDecl *getMapDecl() const { return MapDecl; }
7100     const Expr *getMapExpr() const { return MapExpr; }
7101   };
7102 
7103   /// Class that associates information with a base pointer to be passed to the
7104   /// runtime library.
7105   class BasePointerInfo {
7106     /// The base pointer.
7107     llvm::Value *Ptr = nullptr;
7108     /// The base declaration that refers to this device pointer, or null if
7109     /// there is none.
7110     const ValueDecl *DevPtrDecl = nullptr;
7111 
7112   public:
7113     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7114         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7115     llvm::Value *operator*() const { return Ptr; }
7116     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7117     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7118   };
7119 
7120   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7121   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7122   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7123   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7124   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7125   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7126   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7127 
7128   /// This structure contains combined information generated for mappable
7129   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7130   /// mappers, and non-contiguous information.
7131   struct MapCombinedInfoTy {
7132     struct StructNonContiguousInfo {
7133       bool IsNonContiguous = false;
7134       MapDimArrayTy Dims;
7135       MapNonContiguousArrayTy Offsets;
7136       MapNonContiguousArrayTy Counts;
7137       MapNonContiguousArrayTy Strides;
7138     };
7139     MapExprsArrayTy Exprs;
7140     MapBaseValuesArrayTy BasePointers;
7141     MapValuesArrayTy Pointers;
7142     MapValuesArrayTy Sizes;
7143     MapFlagsArrayTy Types;
7144     MapMappersArrayTy Mappers;
7145     StructNonContiguousInfo NonContigInfo;
7146 
7147     /// Append arrays in \a CurInfo.
7148     void append(MapCombinedInfoTy &CurInfo) {
7149       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7150       BasePointers.append(CurInfo.BasePointers.begin(),
7151                           CurInfo.BasePointers.end());
7152       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7153       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7154       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7155       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7156       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7157                                  CurInfo.NonContigInfo.Dims.end());
7158       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7159                                     CurInfo.NonContigInfo.Offsets.end());
7160       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7161                                    CurInfo.NonContigInfo.Counts.end());
7162       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7163                                     CurInfo.NonContigInfo.Strides.end());
7164     }
7165   };
7166 
7167   /// Map between a struct and the its lowest & highest elements which have been
7168   /// mapped.
7169   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7170   ///                    HE(FieldIndex, Pointer)}
7171   struct StructRangeInfoTy {
7172     MapCombinedInfoTy PreliminaryMapData;
7173     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7174         0, Address::invalid()};
7175     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7176         0, Address::invalid()};
7177     Address Base = Address::invalid();
7178     Address LB = Address::invalid();
7179     bool IsArraySection = false;
7180     bool HasCompleteRecord = false;
7181   };
7182 
7183 private:
7184   /// Kind that defines how a device pointer has to be returned.
7185   struct MapInfo {
7186     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7187     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7188     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7189     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7190     bool ReturnDevicePointer = false;
7191     bool IsImplicit = false;
7192     const ValueDecl *Mapper = nullptr;
7193     const Expr *VarRef = nullptr;
7194     bool ForDeviceAddr = false;
7195 
7196     MapInfo() = default;
7197     MapInfo(
7198         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7199         OpenMPMapClauseKind MapType,
7200         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7201         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7202         bool ReturnDevicePointer, bool IsImplicit,
7203         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7204         bool ForDeviceAddr = false)
7205         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7206           MotionModifiers(MotionModifiers),
7207           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7208           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7209   };
7210 
7211   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7212   /// member and there is no map information about it, then emission of that
7213   /// entry is deferred until the whole struct has been processed.
7214   struct DeferredDevicePtrEntryTy {
7215     const Expr *IE = nullptr;
7216     const ValueDecl *VD = nullptr;
7217     bool ForDeviceAddr = false;
7218 
7219     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7220                              bool ForDeviceAddr)
7221         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7222   };
7223 
7224   /// The target directive from where the mappable clauses were extracted. It
7225   /// is either a executable directive or a user-defined mapper directive.
7226   llvm::PointerUnion<const OMPExecutableDirective *,
7227                      const OMPDeclareMapperDecl *>
7228       CurDir;
7229 
7230   /// Function the directive is being generated for.
7231   CodeGenFunction &CGF;
7232 
7233   /// Set of all first private variables in the current directive.
7234   /// bool data is set to true if the variable is implicitly marked as
7235   /// firstprivate, false otherwise.
7236   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7237 
7238   /// Map between device pointer declarations and their expression components.
7239   /// The key value for declarations in 'this' is null.
7240   llvm::DenseMap<
7241       const ValueDecl *,
7242       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7243       DevPointersMap;
7244 
7245   llvm::Value *getExprTypeSize(const Expr *E) const {
7246     QualType ExprTy = E->getType().getCanonicalType();
7247 
7248     // Calculate the size for array shaping expression.
7249     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7250       llvm::Value *Size =
7251           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7252       for (const Expr *SE : OAE->getDimensions()) {
7253         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7254         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7255                                       CGF.getContext().getSizeType(),
7256                                       SE->getExprLoc());
7257         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7258       }
7259       return Size;
7260     }
7261 
7262     // Reference types are ignored for mapping purposes.
7263     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7264       ExprTy = RefTy->getPointeeType().getCanonicalType();
7265 
7266     // Given that an array section is considered a built-in type, we need to
7267     // do the calculation based on the length of the section instead of relying
7268     // on CGF.getTypeSize(E->getType()).
7269     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7270       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7271                             OAE->getBase()->IgnoreParenImpCasts())
7272                             .getCanonicalType();
7273 
7274       // If there is no length associated with the expression and lower bound is
7275       // not specified too, that means we are using the whole length of the
7276       // base.
7277       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7278           !OAE->getLowerBound())
7279         return CGF.getTypeSize(BaseTy);
7280 
7281       llvm::Value *ElemSize;
7282       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7283         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7284       } else {
7285         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7286         assert(ATy && "Expecting array type if not a pointer type.");
7287         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7288       }
7289 
7290       // If we don't have a length at this point, that is because we have an
7291       // array section with a single element.
7292       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7293         return ElemSize;
7294 
7295       if (const Expr *LenExpr = OAE->getLength()) {
7296         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7297         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7298                                              CGF.getContext().getSizeType(),
7299                                              LenExpr->getExprLoc());
7300         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7301       }
7302       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7303              OAE->getLowerBound() && "expected array_section[lb:].");
7304       // Size = sizetype - lb * elemtype;
7305       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7306       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7307       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7308                                        CGF.getContext().getSizeType(),
7309                                        OAE->getLowerBound()->getExprLoc());
7310       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7311       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7312       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7313       LengthVal = CGF.Builder.CreateSelect(
7314           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7315       return LengthVal;
7316     }
7317     return CGF.getTypeSize(ExprTy);
7318   }
7319 
7320   /// Return the corresponding bits for a given map clause modifier. Add
7321   /// a flag marking the map as a pointer if requested. Add a flag marking the
7322   /// map as the first one of a series of maps that relate to the same map
7323   /// expression.
7324   OpenMPOffloadMappingFlags getMapTypeBits(
7325       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7326       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7327       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7328     OpenMPOffloadMappingFlags Bits =
7329         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7330     switch (MapType) {
7331     case OMPC_MAP_alloc:
7332     case OMPC_MAP_release:
7333       // alloc and release is the default behavior in the runtime library,  i.e.
7334       // if we don't pass any bits alloc/release that is what the runtime is
7335       // going to do. Therefore, we don't need to signal anything for these two
7336       // type modifiers.
7337       break;
7338     case OMPC_MAP_to:
7339       Bits |= OMP_MAP_TO;
7340       break;
7341     case OMPC_MAP_from:
7342       Bits |= OMP_MAP_FROM;
7343       break;
7344     case OMPC_MAP_tofrom:
7345       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7346       break;
7347     case OMPC_MAP_delete:
7348       Bits |= OMP_MAP_DELETE;
7349       break;
7350     case OMPC_MAP_unknown:
7351       llvm_unreachable("Unexpected map type!");
7352     }
7353     if (AddPtrFlag)
7354       Bits |= OMP_MAP_PTR_AND_OBJ;
7355     if (AddIsTargetParamFlag)
7356       Bits |= OMP_MAP_TARGET_PARAM;
7357     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7358         != MapModifiers.end())
7359       Bits |= OMP_MAP_ALWAYS;
7360     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7361         != MapModifiers.end())
7362       Bits |= OMP_MAP_CLOSE;
7363     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7364             MapModifiers.end() ||
7365         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7366             MotionModifiers.end())
7367       Bits |= OMP_MAP_PRESENT;
7368     if (IsNonContiguous)
7369       Bits |= OMP_MAP_NON_CONTIG;
7370     return Bits;
7371   }
7372 
7373   /// Return true if the provided expression is a final array section. A
7374   /// final array section, is one whose length can't be proved to be one.
7375   bool isFinalArraySectionExpression(const Expr *E) const {
7376     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7377 
7378     // It is not an array section and therefore not a unity-size one.
7379     if (!OASE)
7380       return false;
7381 
7382     // An array section with no colon always refer to a single element.
7383     if (OASE->getColonLocFirst().isInvalid())
7384       return false;
7385 
7386     const Expr *Length = OASE->getLength();
7387 
7388     // If we don't have a length we have to check if the array has size 1
7389     // for this dimension. Also, we should always expect a length if the
7390     // base type is pointer.
7391     if (!Length) {
7392       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7393                              OASE->getBase()->IgnoreParenImpCasts())
7394                              .getCanonicalType();
7395       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7396         return ATy->getSize().getSExtValue() != 1;
7397       // If we don't have a constant dimension length, we have to consider
7398       // the current section as having any size, so it is not necessarily
7399       // unitary. If it happen to be unity size, that's user fault.
7400       return true;
7401     }
7402 
7403     // Check if the length evaluates to 1.
7404     Expr::EvalResult Result;
7405     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7406       return true; // Can have more that size 1.
7407 
7408     llvm::APSInt ConstLength = Result.Val.getInt();
7409     return ConstLength.getSExtValue() != 1;
7410   }
7411 
7412   /// Generate the base pointers, section pointers, sizes, map type bits, and
7413   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7414   /// map type, map or motion modifiers, and expression components.
7415   /// \a IsFirstComponent should be set to true if the provided set of
7416   /// components is the first associated with a capture.
7417   void generateInfoForComponentList(
7418       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7419       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7420       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7421       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7422       bool IsFirstComponentList, bool IsImplicit,
7423       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7424       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7425       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7426           OverlappedElements = llvm::None) const {
7427     // The following summarizes what has to be generated for each map and the
7428     // types below. The generated information is expressed in this order:
7429     // base pointer, section pointer, size, flags
7430     // (to add to the ones that come from the map type and modifier).
7431     //
7432     // double d;
7433     // int i[100];
7434     // float *p;
7435     //
7436     // struct S1 {
7437     //   int i;
7438     //   float f[50];
7439     // }
7440     // struct S2 {
7441     //   int i;
7442     //   float f[50];
7443     //   S1 s;
7444     //   double *p;
7445     //   struct S2 *ps;
7446     //   int &ref;
7447     // }
7448     // S2 s;
7449     // S2 *ps;
7450     //
7451     // map(d)
7452     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7453     //
7454     // map(i)
7455     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7456     //
7457     // map(i[1:23])
7458     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7459     //
7460     // map(p)
7461     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7462     //
7463     // map(p[1:24])
7464     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7465     // in unified shared memory mode or for local pointers
7466     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7467     //
7468     // map(s)
7469     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7470     //
7471     // map(s.i)
7472     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7473     //
7474     // map(s.s.f)
7475     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7476     //
7477     // map(s.p)
7478     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7479     //
7480     // map(to: s.p[:22])
7481     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7482     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7483     // &(s.p), &(s.p[0]), 22*sizeof(double),
7484     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7485     // (*) alloc space for struct members, only this is a target parameter
7486     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7487     //      optimizes this entry out, same in the examples below)
7488     // (***) map the pointee (map: to)
7489     //
7490     // map(to: s.ref)
7491     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7492     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7493     // (*) alloc space for struct members, only this is a target parameter
7494     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7495     //      optimizes this entry out, same in the examples below)
7496     // (***) map the pointee (map: to)
7497     //
7498     // map(s.ps)
7499     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7500     //
7501     // map(from: s.ps->s.i)
7502     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7503     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7504     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7505     //
7506     // map(to: s.ps->ps)
7507     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7508     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7509     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7510     //
7511     // map(s.ps->ps->ps)
7512     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7513     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7514     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7515     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7516     //
7517     // map(to: s.ps->ps->s.f[:22])
7518     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7519     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7520     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7521     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7522     //
7523     // map(ps)
7524     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7525     //
7526     // map(ps->i)
7527     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7528     //
7529     // map(ps->s.f)
7530     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7531     //
7532     // map(from: ps->p)
7533     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7534     //
7535     // map(to: ps->p[:22])
7536     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7537     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7538     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7539     //
7540     // map(ps->ps)
7541     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7542     //
7543     // map(from: ps->ps->s.i)
7544     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7545     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7546     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7547     //
7548     // map(from: ps->ps->ps)
7549     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7550     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7551     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7552     //
7553     // map(ps->ps->ps->ps)
7554     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7555     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7556     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7557     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7558     //
7559     // map(to: ps->ps->ps->s.f[:22])
7560     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7561     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7562     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7563     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7564     //
7565     // map(to: s.f[:22]) map(from: s.p[:33])
7566     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7567     //     sizeof(double*) (**), TARGET_PARAM
7568     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7569     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7570     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7571     // (*) allocate contiguous space needed to fit all mapped members even if
7572     //     we allocate space for members not mapped (in this example,
7573     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7574     //     them as well because they fall between &s.f[0] and &s.p)
7575     //
7576     // map(from: s.f[:22]) map(to: ps->p[:33])
7577     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7578     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7579     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7580     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7581     // (*) the struct this entry pertains to is the 2nd element in the list of
7582     //     arguments, hence MEMBER_OF(2)
7583     //
7584     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7585     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7586     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7587     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7588     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7589     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7590     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7591     // (*) the struct this entry pertains to is the 4th element in the list
7592     //     of arguments, hence MEMBER_OF(4)
7593 
7594     // Track if the map information being generated is the first for a capture.
7595     bool IsCaptureFirstInfo = IsFirstComponentList;
7596     // When the variable is on a declare target link or in a to clause with
7597     // unified memory, a reference is needed to hold the host/device address
7598     // of the variable.
7599     bool RequiresReference = false;
7600 
7601     // Scan the components from the base to the complete expression.
7602     auto CI = Components.rbegin();
7603     auto CE = Components.rend();
7604     auto I = CI;
7605 
7606     // Track if the map information being generated is the first for a list of
7607     // components.
7608     bool IsExpressionFirstInfo = true;
7609     bool FirstPointerInComplexData = false;
7610     Address BP = Address::invalid();
7611     const Expr *AssocExpr = I->getAssociatedExpression();
7612     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7613     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7614     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7615 
7616     if (isa<MemberExpr>(AssocExpr)) {
7617       // The base is the 'this' pointer. The content of the pointer is going
7618       // to be the base of the field being mapped.
7619       BP = CGF.LoadCXXThisAddress();
7620     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7621                (OASE &&
7622                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7623       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7624     } else if (OAShE &&
7625                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7626       BP = Address(
7627           CGF.EmitScalarExpr(OAShE->getBase()),
7628           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7629     } else {
7630       // The base is the reference to the variable.
7631       // BP = &Var.
7632       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7633       if (const auto *VD =
7634               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7635         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7636                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7637           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7638               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7639                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7640             RequiresReference = true;
7641             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7642           }
7643         }
7644       }
7645 
7646       // If the variable is a pointer and is being dereferenced (i.e. is not
7647       // the last component), the base has to be the pointer itself, not its
7648       // reference. References are ignored for mapping purposes.
7649       QualType Ty =
7650           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7651       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7652         // No need to generate individual map information for the pointer, it
7653         // can be associated with the combined storage if shared memory mode is
7654         // active or the base declaration is not global variable.
7655         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7656         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7657             !VD || VD->hasLocalStorage())
7658           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7659         else
7660           FirstPointerInComplexData = true;
7661         ++I;
7662       }
7663     }
7664 
7665     // Track whether a component of the list should be marked as MEMBER_OF some
7666     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7667     // in a component list should be marked as MEMBER_OF, all subsequent entries
7668     // do not belong to the base struct. E.g.
7669     // struct S2 s;
7670     // s.ps->ps->ps->f[:]
7671     //   (1) (2) (3) (4)
7672     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7673     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7674     // is the pointee of ps(2) which is not member of struct s, so it should not
7675     // be marked as such (it is still PTR_AND_OBJ).
7676     // The variable is initialized to false so that PTR_AND_OBJ entries which
7677     // are not struct members are not considered (e.g. array of pointers to
7678     // data).
7679     bool ShouldBeMemberOf = false;
7680 
7681     // Variable keeping track of whether or not we have encountered a component
7682     // in the component list which is a member expression. Useful when we have a
7683     // pointer or a final array section, in which case it is the previous
7684     // component in the list which tells us whether we have a member expression.
7685     // E.g. X.f[:]
7686     // While processing the final array section "[:]" it is "f" which tells us
7687     // whether we are dealing with a member of a declared struct.
7688     const MemberExpr *EncounteredME = nullptr;
7689 
7690     // Track for the total number of dimension. Start from one for the dummy
7691     // dimension.
7692     uint64_t DimSize = 1;
7693 
7694     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7695     bool IsPrevMemberReference = false;
7696 
7697     for (; I != CE; ++I) {
7698       // If the current component is member of a struct (parent struct) mark it.
7699       if (!EncounteredME) {
7700         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7701         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7702         // as MEMBER_OF the parent struct.
7703         if (EncounteredME) {
7704           ShouldBeMemberOf = true;
7705           // Do not emit as complex pointer if this is actually not array-like
7706           // expression.
7707           if (FirstPointerInComplexData) {
7708             QualType Ty = std::prev(I)
7709                               ->getAssociatedDeclaration()
7710                               ->getType()
7711                               .getNonReferenceType();
7712             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7713             FirstPointerInComplexData = false;
7714           }
7715         }
7716       }
7717 
7718       auto Next = std::next(I);
7719 
7720       // We need to generate the addresses and sizes if this is the last
7721       // component, if the component is a pointer or if it is an array section
7722       // whose length can't be proved to be one. If this is a pointer, it
7723       // becomes the base address for the following components.
7724 
7725       // A final array section, is one whose length can't be proved to be one.
7726       // If the map item is non-contiguous then we don't treat any array section
7727       // as final array section.
7728       bool IsFinalArraySection =
7729           !IsNonContiguous &&
7730           isFinalArraySectionExpression(I->getAssociatedExpression());
7731 
7732       // If we have a declaration for the mapping use that, otherwise use
7733       // the base declaration of the map clause.
7734       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7735                                      ? I->getAssociatedDeclaration()
7736                                      : BaseDecl;
7737 
7738       // Get information on whether the element is a pointer. Have to do a
7739       // special treatment for array sections given that they are built-in
7740       // types.
7741       const auto *OASE =
7742           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7743       const auto *OAShE =
7744           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7745       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7746       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7747       bool IsPointer =
7748           OAShE ||
7749           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7750                        .getCanonicalType()
7751                        ->isAnyPointerType()) ||
7752           I->getAssociatedExpression()->getType()->isAnyPointerType();
7753       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7754                                MapDecl &&
7755                                MapDecl->getType()->isLValueReferenceType();
7756       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7757 
7758       if (OASE)
7759         ++DimSize;
7760 
7761       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7762           IsFinalArraySection) {
7763         // If this is not the last component, we expect the pointer to be
7764         // associated with an array expression or member expression.
7765         assert((Next == CE ||
7766                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7767                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7768                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7769                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7770                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7771                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7772                "Unexpected expression");
7773 
7774         Address LB = Address::invalid();
7775         Address LowestElem = Address::invalid();
7776         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7777                                        const MemberExpr *E) {
7778           const Expr *BaseExpr = E->getBase();
7779           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7780           // scalar.
7781           LValue BaseLV;
7782           if (E->isArrow()) {
7783             LValueBaseInfo BaseInfo;
7784             TBAAAccessInfo TBAAInfo;
7785             Address Addr =
7786                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7787             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7788             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7789           } else {
7790             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7791           }
7792           return BaseLV;
7793         };
7794         if (OAShE) {
7795           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7796                                     CGF.getContext().getTypeAlignInChars(
7797                                         OAShE->getBase()->getType()));
7798         } else if (IsMemberReference) {
7799           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7800           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7801           LowestElem = CGF.EmitLValueForFieldInitialization(
7802                               BaseLVal, cast<FieldDecl>(MapDecl))
7803                            .getAddress(CGF);
7804           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7805                    .getAddress(CGF);
7806         } else {
7807           LowestElem = LB =
7808               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7809                   .getAddress(CGF);
7810         }
7811 
7812         // If this component is a pointer inside the base struct then we don't
7813         // need to create any entry for it - it will be combined with the object
7814         // it is pointing to into a single PTR_AND_OBJ entry.
7815         bool IsMemberPointerOrAddr =
7816             EncounteredME &&
7817             (((IsPointer || ForDeviceAddr) &&
7818               I->getAssociatedExpression() == EncounteredME) ||
7819              (IsPrevMemberReference && !IsPointer) ||
7820              (IsMemberReference && Next != CE &&
7821               !Next->getAssociatedExpression()->getType()->isPointerType()));
7822         if (!OverlappedElements.empty() && Next == CE) {
7823           // Handle base element with the info for overlapped elements.
7824           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7825           assert(!IsPointer &&
7826                  "Unexpected base element with the pointer type.");
7827           // Mark the whole struct as the struct that requires allocation on the
7828           // device.
7829           PartialStruct.LowestElem = {0, LowestElem};
7830           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7831               I->getAssociatedExpression()->getType());
7832           Address HB = CGF.Builder.CreateConstGEP(
7833               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7834                                                               CGF.VoidPtrTy),
7835               TypeSize.getQuantity() - 1);
7836           PartialStruct.HighestElem = {
7837               std::numeric_limits<decltype(
7838                   PartialStruct.HighestElem.first)>::max(),
7839               HB};
7840           PartialStruct.Base = BP;
7841           PartialStruct.LB = LB;
7842           assert(
7843               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7844               "Overlapped elements must be used only once for the variable.");
7845           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7846           // Emit data for non-overlapped data.
7847           OpenMPOffloadMappingFlags Flags =
7848               OMP_MAP_MEMBER_OF |
7849               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7850                              /*AddPtrFlag=*/false,
7851                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7852           llvm::Value *Size = nullptr;
7853           // Do bitcopy of all non-overlapped structure elements.
7854           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7855                    Component : OverlappedElements) {
7856             Address ComponentLB = Address::invalid();
7857             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7858                  Component) {
7859               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7860                 const auto *FD = dyn_cast<FieldDecl>(VD);
7861                 if (FD && FD->getType()->isLValueReferenceType()) {
7862                   const auto *ME =
7863                       cast<MemberExpr>(MC.getAssociatedExpression());
7864                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7865                   ComponentLB =
7866                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7867                           .getAddress(CGF);
7868                 } else {
7869                   ComponentLB =
7870                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7871                           .getAddress(CGF);
7872                 }
7873                 Size = CGF.Builder.CreatePtrDiff(
7874                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7875                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7876                 break;
7877               }
7878             }
7879             assert(Size && "Failed to determine structure size");
7880             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7881             CombinedInfo.BasePointers.push_back(BP.getPointer());
7882             CombinedInfo.Pointers.push_back(LB.getPointer());
7883             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7884                 Size, CGF.Int64Ty, /*isSigned=*/true));
7885             CombinedInfo.Types.push_back(Flags);
7886             CombinedInfo.Mappers.push_back(nullptr);
7887             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7888                                                                       : 1);
7889             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7890           }
7891           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7892           CombinedInfo.BasePointers.push_back(BP.getPointer());
7893           CombinedInfo.Pointers.push_back(LB.getPointer());
7894           Size = CGF.Builder.CreatePtrDiff(
7895               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7896               CGF.EmitCastToVoidPtr(LB.getPointer()));
7897           CombinedInfo.Sizes.push_back(
7898               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7899           CombinedInfo.Types.push_back(Flags);
7900           CombinedInfo.Mappers.push_back(nullptr);
7901           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7902                                                                     : 1);
7903           break;
7904         }
7905         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7906         if (!IsMemberPointerOrAddr ||
7907             (Next == CE && MapType != OMPC_MAP_unknown)) {
7908           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7909           CombinedInfo.BasePointers.push_back(BP.getPointer());
7910           CombinedInfo.Pointers.push_back(LB.getPointer());
7911           CombinedInfo.Sizes.push_back(
7912               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7913           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7914                                                                     : 1);
7915 
7916           // If Mapper is valid, the last component inherits the mapper.
7917           bool HasMapper = Mapper && Next == CE;
7918           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7919 
7920           // We need to add a pointer flag for each map that comes from the
7921           // same expression except for the first one. We also need to signal
7922           // this map is the first one that relates with the current capture
7923           // (there is a set of entries for each capture).
7924           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7925               MapType, MapModifiers, MotionModifiers, IsImplicit,
7926               !IsExpressionFirstInfo || RequiresReference ||
7927                   FirstPointerInComplexData || IsMemberReference,
7928               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7929 
7930           if (!IsExpressionFirstInfo || IsMemberReference) {
7931             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7932             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7933             if (IsPointer || (IsMemberReference && Next != CE))
7934               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7935                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7936 
7937             if (ShouldBeMemberOf) {
7938               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7939               // should be later updated with the correct value of MEMBER_OF.
7940               Flags |= OMP_MAP_MEMBER_OF;
7941               // From now on, all subsequent PTR_AND_OBJ entries should not be
7942               // marked as MEMBER_OF.
7943               ShouldBeMemberOf = false;
7944             }
7945           }
7946 
7947           CombinedInfo.Types.push_back(Flags);
7948         }
7949 
7950         // If we have encountered a member expression so far, keep track of the
7951         // mapped member. If the parent is "*this", then the value declaration
7952         // is nullptr.
7953         if (EncounteredME) {
7954           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7955           unsigned FieldIndex = FD->getFieldIndex();
7956 
7957           // Update info about the lowest and highest elements for this struct
7958           if (!PartialStruct.Base.isValid()) {
7959             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7960             if (IsFinalArraySection) {
7961               Address HB =
7962                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7963                       .getAddress(CGF);
7964               PartialStruct.HighestElem = {FieldIndex, HB};
7965             } else {
7966               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7967             }
7968             PartialStruct.Base = BP;
7969             PartialStruct.LB = BP;
7970           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7971             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7972           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7973             PartialStruct.HighestElem = {FieldIndex, LowestElem};
7974           }
7975         }
7976 
7977         // Need to emit combined struct for array sections.
7978         if (IsFinalArraySection || IsNonContiguous)
7979           PartialStruct.IsArraySection = true;
7980 
7981         // If we have a final array section, we are done with this expression.
7982         if (IsFinalArraySection)
7983           break;
7984 
7985         // The pointer becomes the base for the next element.
7986         if (Next != CE)
7987           BP = IsMemberReference ? LowestElem : LB;
7988 
7989         IsExpressionFirstInfo = false;
7990         IsCaptureFirstInfo = false;
7991         FirstPointerInComplexData = false;
7992         IsPrevMemberReference = IsMemberReference;
7993       } else if (FirstPointerInComplexData) {
7994         QualType Ty = Components.rbegin()
7995                           ->getAssociatedDeclaration()
7996                           ->getType()
7997                           .getNonReferenceType();
7998         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7999         FirstPointerInComplexData = false;
8000       }
8001     }
8002     // If ran into the whole component - allocate the space for the whole
8003     // record.
8004     if (!EncounteredME)
8005       PartialStruct.HasCompleteRecord = true;
8006 
8007     if (!IsNonContiguous)
8008       return;
8009 
8010     const ASTContext &Context = CGF.getContext();
8011 
8012     // For supporting stride in array section, we need to initialize the first
8013     // dimension size as 1, first offset as 0, and first count as 1
8014     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8015     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8016     MapValuesArrayTy CurStrides;
8017     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8018     uint64_t ElementTypeSize;
8019 
8020     // Collect Size information for each dimension and get the element size as
8021     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8022     // should be [10, 10] and the first stride is 4 btyes.
8023     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8024          Components) {
8025       const Expr *AssocExpr = Component.getAssociatedExpression();
8026       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8027 
8028       if (!OASE)
8029         continue;
8030 
8031       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8032       auto *CAT = Context.getAsConstantArrayType(Ty);
8033       auto *VAT = Context.getAsVariableArrayType(Ty);
8034 
8035       // We need all the dimension size except for the last dimension.
8036       assert((VAT || CAT || &Component == &*Components.begin()) &&
8037              "Should be either ConstantArray or VariableArray if not the "
8038              "first Component");
8039 
8040       // Get element size if CurStrides is empty.
8041       if (CurStrides.empty()) {
8042         const Type *ElementType = nullptr;
8043         if (CAT)
8044           ElementType = CAT->getElementType().getTypePtr();
8045         else if (VAT)
8046           ElementType = VAT->getElementType().getTypePtr();
8047         else
8048           assert(&Component == &*Components.begin() &&
8049                  "Only expect pointer (non CAT or VAT) when this is the "
8050                  "first Component");
8051         // If ElementType is null, then it means the base is a pointer
8052         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8053         // for next iteration.
8054         if (ElementType) {
8055           // For the case that having pointer as base, we need to remove one
8056           // level of indirection.
8057           if (&Component != &*Components.begin())
8058             ElementType = ElementType->getPointeeOrArrayElementType();
8059           ElementTypeSize =
8060               Context.getTypeSizeInChars(ElementType).getQuantity();
8061           CurStrides.push_back(
8062               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8063         }
8064       }
8065       // Get dimension value except for the last dimension since we don't need
8066       // it.
8067       if (DimSizes.size() < Components.size() - 1) {
8068         if (CAT)
8069           DimSizes.push_back(llvm::ConstantInt::get(
8070               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8071         else if (VAT)
8072           DimSizes.push_back(CGF.Builder.CreateIntCast(
8073               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8074               /*IsSigned=*/false));
8075       }
8076     }
8077 
8078     // Skip the dummy dimension since we have already have its information.
8079     auto DI = DimSizes.begin() + 1;
8080     // Product of dimension.
8081     llvm::Value *DimProd =
8082         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8083 
8084     // Collect info for non-contiguous. Notice that offset, count, and stride
8085     // are only meaningful for array-section, so we insert a null for anything
8086     // other than array-section.
8087     // Also, the size of offset, count, and stride are not the same as
8088     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8089     // count, and stride are the same as the number of non-contiguous
8090     // declaration in target update to/from clause.
8091     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8092          Components) {
8093       const Expr *AssocExpr = Component.getAssociatedExpression();
8094 
8095       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8096         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8097             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8098             /*isSigned=*/false);
8099         CurOffsets.push_back(Offset);
8100         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8101         CurStrides.push_back(CurStrides.back());
8102         continue;
8103       }
8104 
8105       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8106 
8107       if (!OASE)
8108         continue;
8109 
8110       // Offset
8111       const Expr *OffsetExpr = OASE->getLowerBound();
8112       llvm::Value *Offset = nullptr;
8113       if (!OffsetExpr) {
8114         // If offset is absent, then we just set it to zero.
8115         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8116       } else {
8117         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8118                                            CGF.Int64Ty,
8119                                            /*isSigned=*/false);
8120       }
8121       CurOffsets.push_back(Offset);
8122 
8123       // Count
8124       const Expr *CountExpr = OASE->getLength();
8125       llvm::Value *Count = nullptr;
8126       if (!CountExpr) {
8127         // In Clang, once a high dimension is an array section, we construct all
8128         // the lower dimension as array section, however, for case like
8129         // arr[0:2][2], Clang construct the inner dimension as an array section
8130         // but it actually is not in an array section form according to spec.
8131         if (!OASE->getColonLocFirst().isValid() &&
8132             !OASE->getColonLocSecond().isValid()) {
8133           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8134         } else {
8135           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8136           // When the length is absent it defaults to ⌈(size −
8137           // lower-bound)/stride⌉, where size is the size of the array
8138           // dimension.
8139           const Expr *StrideExpr = OASE->getStride();
8140           llvm::Value *Stride =
8141               StrideExpr
8142                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8143                                               CGF.Int64Ty, /*isSigned=*/false)
8144                   : nullptr;
8145           if (Stride)
8146             Count = CGF.Builder.CreateUDiv(
8147                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8148           else
8149             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8150         }
8151       } else {
8152         Count = CGF.EmitScalarExpr(CountExpr);
8153       }
8154       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8155       CurCounts.push_back(Count);
8156 
8157       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8158       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8159       //              Offset      Count     Stride
8160       //    D0          0           1         4    (int)    <- dummy dimension
8161       //    D1          0           2         8    (2 * (1) * 4)
8162       //    D2          1           2         20   (1 * (1 * 5) * 4)
8163       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8164       const Expr *StrideExpr = OASE->getStride();
8165       llvm::Value *Stride =
8166           StrideExpr
8167               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8168                                           CGF.Int64Ty, /*isSigned=*/false)
8169               : nullptr;
8170       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8171       if (Stride)
8172         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8173       else
8174         CurStrides.push_back(DimProd);
8175       if (DI != DimSizes.end())
8176         ++DI;
8177     }
8178 
8179     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8180     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8181     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8182   }
8183 
8184   /// Return the adjusted map modifiers if the declaration a capture refers to
8185   /// appears in a first-private clause. This is expected to be used only with
8186   /// directives that start with 'target'.
8187   MappableExprsHandler::OpenMPOffloadMappingFlags
8188   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8189     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8190 
8191     // A first private variable captured by reference will use only the
8192     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8193     // declaration is known as first-private in this handler.
8194     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8195       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8196           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8197         return MappableExprsHandler::OMP_MAP_ALWAYS |
8198                MappableExprsHandler::OMP_MAP_TO;
8199       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8200         return MappableExprsHandler::OMP_MAP_TO |
8201                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8202       return MappableExprsHandler::OMP_MAP_PRIVATE |
8203              MappableExprsHandler::OMP_MAP_TO;
8204     }
8205     return MappableExprsHandler::OMP_MAP_TO |
8206            MappableExprsHandler::OMP_MAP_FROM;
8207   }
8208 
8209   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8210     // Rotate by getFlagMemberOffset() bits.
8211     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8212                                                   << getFlagMemberOffset());
8213   }
8214 
8215   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8216                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8217     // If the entry is PTR_AND_OBJ but has not been marked with the special
8218     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8219     // marked as MEMBER_OF.
8220     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8221         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8222       return;
8223 
8224     // Reset the placeholder value to prepare the flag for the assignment of the
8225     // proper MEMBER_OF value.
8226     Flags &= ~OMP_MAP_MEMBER_OF;
8227     Flags |= MemberOfFlag;
8228   }
8229 
8230   void getPlainLayout(const CXXRecordDecl *RD,
8231                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8232                       bool AsBase) const {
8233     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8234 
8235     llvm::StructType *St =
8236         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8237 
8238     unsigned NumElements = St->getNumElements();
8239     llvm::SmallVector<
8240         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8241         RecordLayout(NumElements);
8242 
8243     // Fill bases.
8244     for (const auto &I : RD->bases()) {
8245       if (I.isVirtual())
8246         continue;
8247       const auto *Base = I.getType()->getAsCXXRecordDecl();
8248       // Ignore empty bases.
8249       if (Base->isEmpty() || CGF.getContext()
8250                                  .getASTRecordLayout(Base)
8251                                  .getNonVirtualSize()
8252                                  .isZero())
8253         continue;
8254 
8255       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8256       RecordLayout[FieldIndex] = Base;
8257     }
8258     // Fill in virtual bases.
8259     for (const auto &I : RD->vbases()) {
8260       const auto *Base = I.getType()->getAsCXXRecordDecl();
8261       // Ignore empty bases.
8262       if (Base->isEmpty())
8263         continue;
8264       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8265       if (RecordLayout[FieldIndex])
8266         continue;
8267       RecordLayout[FieldIndex] = Base;
8268     }
8269     // Fill in all the fields.
8270     assert(!RD->isUnion() && "Unexpected union.");
8271     for (const auto *Field : RD->fields()) {
8272       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8273       // will fill in later.)
8274       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8275         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8276         RecordLayout[FieldIndex] = Field;
8277       }
8278     }
8279     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8280              &Data : RecordLayout) {
8281       if (Data.isNull())
8282         continue;
8283       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8284         getPlainLayout(Base, Layout, /*AsBase=*/true);
8285       else
8286         Layout.push_back(Data.get<const FieldDecl *>());
8287     }
8288   }
8289 
8290   /// Generate all the base pointers, section pointers, sizes, map types, and
8291   /// mappers for the extracted mappable expressions (all included in \a
8292   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8293   /// pair of the relevant declaration and index where it occurs is appended to
8294   /// the device pointers info array.
8295   void generateAllInfoForClauses(
8296       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8297       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8298           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8299     // We have to process the component lists that relate with the same
8300     // declaration in a single chunk so that we can generate the map flags
8301     // correctly. Therefore, we organize all lists in a map.
8302     enum MapKind { Present, Allocs, Other, Total };
8303     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8304                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8305         Info;
8306 
8307     // Helper function to fill the information map for the different supported
8308     // clauses.
8309     auto &&InfoGen =
8310         [&Info, &SkipVarSet](
8311             const ValueDecl *D, MapKind Kind,
8312             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8313             OpenMPMapClauseKind MapType,
8314             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8315             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8316             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8317             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8318           if (SkipVarSet.contains(D))
8319             return;
8320           auto It = Info.find(D);
8321           if (It == Info.end())
8322             It = Info
8323                      .insert(std::make_pair(
8324                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8325                      .first;
8326           It->second[Kind].emplace_back(
8327               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8328               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8329         };
8330 
8331     for (const auto *Cl : Clauses) {
8332       const auto *C = dyn_cast<OMPMapClause>(Cl);
8333       if (!C)
8334         continue;
8335       MapKind Kind = Other;
8336       if (!C->getMapTypeModifiers().empty() &&
8337           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8338             return K == OMPC_MAP_MODIFIER_present;
8339           }))
8340         Kind = Present;
8341       else if (C->getMapType() == OMPC_MAP_alloc)
8342         Kind = Allocs;
8343       const auto *EI = C->getVarRefs().begin();
8344       for (const auto L : C->component_lists()) {
8345         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8346         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8347                 C->getMapTypeModifiers(), llvm::None,
8348                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8349                 E);
8350         ++EI;
8351       }
8352     }
8353     for (const auto *Cl : Clauses) {
8354       const auto *C = dyn_cast<OMPToClause>(Cl);
8355       if (!C)
8356         continue;
8357       MapKind Kind = Other;
8358       if (!C->getMotionModifiers().empty() &&
8359           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8360             return K == OMPC_MOTION_MODIFIER_present;
8361           }))
8362         Kind = Present;
8363       const auto *EI = C->getVarRefs().begin();
8364       for (const auto L : C->component_lists()) {
8365         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8366                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8367                 C->isImplicit(), std::get<2>(L), *EI);
8368         ++EI;
8369       }
8370     }
8371     for (const auto *Cl : Clauses) {
8372       const auto *C = dyn_cast<OMPFromClause>(Cl);
8373       if (!C)
8374         continue;
8375       MapKind Kind = Other;
8376       if (!C->getMotionModifiers().empty() &&
8377           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8378             return K == OMPC_MOTION_MODIFIER_present;
8379           }))
8380         Kind = Present;
8381       const auto *EI = C->getVarRefs().begin();
8382       for (const auto L : C->component_lists()) {
8383         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8384                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8385                 C->isImplicit(), std::get<2>(L), *EI);
8386         ++EI;
8387       }
8388     }
8389 
8390     // Look at the use_device_ptr clause information and mark the existing map
8391     // entries as such. If there is no map information for an entry in the
8392     // use_device_ptr list, we create one with map type 'alloc' and zero size
8393     // section. It is the user fault if that was not mapped before. If there is
8394     // no map information and the pointer is a struct member, then we defer the
8395     // emission of that entry until the whole struct has been processed.
8396     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8397                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8398         DeferredInfo;
8399     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8400 
8401     for (const auto *Cl : Clauses) {
8402       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8403       if (!C)
8404         continue;
8405       for (const auto L : C->component_lists()) {
8406         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8407             std::get<1>(L);
8408         assert(!Components.empty() &&
8409                "Not expecting empty list of components!");
8410         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8411         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8412         const Expr *IE = Components.back().getAssociatedExpression();
8413         // If the first component is a member expression, we have to look into
8414         // 'this', which maps to null in the map of map information. Otherwise
8415         // look directly for the information.
8416         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8417 
8418         // We potentially have map information for this declaration already.
8419         // Look for the first set of components that refer to it.
8420         if (It != Info.end()) {
8421           bool Found = false;
8422           for (auto &Data : It->second) {
8423             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8424               return MI.Components.back().getAssociatedDeclaration() == VD;
8425             });
8426             // If we found a map entry, signal that the pointer has to be
8427             // returned and move on to the next declaration. Exclude cases where
8428             // the base pointer is mapped as array subscript, array section or
8429             // array shaping. The base address is passed as a pointer to base in
8430             // this case and cannot be used as a base for use_device_ptr list
8431             // item.
8432             if (CI != Data.end()) {
8433               auto PrevCI = std::next(CI->Components.rbegin());
8434               const auto *VarD = dyn_cast<VarDecl>(VD);
8435               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8436                   isa<MemberExpr>(IE) ||
8437                   !VD->getType().getNonReferenceType()->isPointerType() ||
8438                   PrevCI == CI->Components.rend() ||
8439                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8440                   VarD->hasLocalStorage()) {
8441                 CI->ReturnDevicePointer = true;
8442                 Found = true;
8443                 break;
8444               }
8445             }
8446           }
8447           if (Found)
8448             continue;
8449         }
8450 
8451         // We didn't find any match in our map information - generate a zero
8452         // size array section - if the pointer is a struct member we defer this
8453         // action until the whole struct has been processed.
8454         if (isa<MemberExpr>(IE)) {
8455           // Insert the pointer into Info to be processed by
8456           // generateInfoForComponentList. Because it is a member pointer
8457           // without a pointee, no entry will be generated for it, therefore
8458           // we need to generate one after the whole struct has been processed.
8459           // Nonetheless, generateInfoForComponentList must be called to take
8460           // the pointer into account for the calculation of the range of the
8461           // partial struct.
8462           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8463                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8464                   nullptr);
8465           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8466         } else {
8467           llvm::Value *Ptr =
8468               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8469           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8470           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8471           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8472           UseDevicePtrCombinedInfo.Sizes.push_back(
8473               llvm::Constant::getNullValue(CGF.Int64Ty));
8474           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8475           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8476         }
8477       }
8478     }
8479 
8480     // Look at the use_device_addr clause information and mark the existing map
8481     // entries as such. If there is no map information for an entry in the
8482     // use_device_addr list, we create one with map type 'alloc' and zero size
8483     // section. It is the user fault if that was not mapped before. If there is
8484     // no map information and the pointer is a struct member, then we defer the
8485     // emission of that entry until the whole struct has been processed.
8486     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8487     for (const auto *Cl : Clauses) {
8488       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8489       if (!C)
8490         continue;
8491       for (const auto L : C->component_lists()) {
8492         assert(!std::get<1>(L).empty() &&
8493                "Not expecting empty list of components!");
8494         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8495         if (!Processed.insert(VD).second)
8496           continue;
8497         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8498         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8499         // If the first component is a member expression, we have to look into
8500         // 'this', which maps to null in the map of map information. Otherwise
8501         // look directly for the information.
8502         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8503 
8504         // We potentially have map information for this declaration already.
8505         // Look for the first set of components that refer to it.
8506         if (It != Info.end()) {
8507           bool Found = false;
8508           for (auto &Data : It->second) {
8509             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8510               return MI.Components.back().getAssociatedDeclaration() == VD;
8511             });
8512             // If we found a map entry, signal that the pointer has to be
8513             // returned and move on to the next declaration.
8514             if (CI != Data.end()) {
8515               CI->ReturnDevicePointer = true;
8516               Found = true;
8517               break;
8518             }
8519           }
8520           if (Found)
8521             continue;
8522         }
8523 
8524         // We didn't find any match in our map information - generate a zero
8525         // size array section - if the pointer is a struct member we defer this
8526         // action until the whole struct has been processed.
8527         if (isa<MemberExpr>(IE)) {
8528           // Insert the pointer into Info to be processed by
8529           // generateInfoForComponentList. Because it is a member pointer
8530           // without a pointee, no entry will be generated for it, therefore
8531           // we need to generate one after the whole struct has been processed.
8532           // Nonetheless, generateInfoForComponentList must be called to take
8533           // the pointer into account for the calculation of the range of the
8534           // partial struct.
8535           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8536                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8537                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8538           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8539         } else {
8540           llvm::Value *Ptr;
8541           if (IE->isGLValue())
8542             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8543           else
8544             Ptr = CGF.EmitScalarExpr(IE);
8545           CombinedInfo.Exprs.push_back(VD);
8546           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8547           CombinedInfo.Pointers.push_back(Ptr);
8548           CombinedInfo.Sizes.push_back(
8549               llvm::Constant::getNullValue(CGF.Int64Ty));
8550           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8551           CombinedInfo.Mappers.push_back(nullptr);
8552         }
8553       }
8554     }
8555 
8556     for (const auto &Data : Info) {
8557       StructRangeInfoTy PartialStruct;
8558       // Temporary generated information.
8559       MapCombinedInfoTy CurInfo;
8560       const Decl *D = Data.first;
8561       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8562       for (const auto &M : Data.second) {
8563         for (const MapInfo &L : M) {
8564           assert(!L.Components.empty() &&
8565                  "Not expecting declaration with no component lists.");
8566 
8567           // Remember the current base pointer index.
8568           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8569           CurInfo.NonContigInfo.IsNonContiguous =
8570               L.Components.back().isNonContiguous();
8571           generateInfoForComponentList(
8572               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8573               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8574               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8575 
8576           // If this entry relates with a device pointer, set the relevant
8577           // declaration and add the 'return pointer' flag.
8578           if (L.ReturnDevicePointer) {
8579             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8580                    "Unexpected number of mapped base pointers.");
8581 
8582             const ValueDecl *RelevantVD =
8583                 L.Components.back().getAssociatedDeclaration();
8584             assert(RelevantVD &&
8585                    "No relevant declaration related with device pointer??");
8586 
8587             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8588                 RelevantVD);
8589             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8590           }
8591         }
8592       }
8593 
8594       // Append any pending zero-length pointers which are struct members and
8595       // used with use_device_ptr or use_device_addr.
8596       auto CI = DeferredInfo.find(Data.first);
8597       if (CI != DeferredInfo.end()) {
8598         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8599           llvm::Value *BasePtr;
8600           llvm::Value *Ptr;
8601           if (L.ForDeviceAddr) {
8602             if (L.IE->isGLValue())
8603               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8604             else
8605               Ptr = this->CGF.EmitScalarExpr(L.IE);
8606             BasePtr = Ptr;
8607             // Entry is RETURN_PARAM. Also, set the placeholder value
8608             // MEMBER_OF=FFFF so that the entry is later updated with the
8609             // correct value of MEMBER_OF.
8610             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8611           } else {
8612             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8613             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8614                                              L.IE->getExprLoc());
8615             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8616             // placeholder value MEMBER_OF=FFFF so that the entry is later
8617             // updated with the correct value of MEMBER_OF.
8618             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8619                                     OMP_MAP_MEMBER_OF);
8620           }
8621           CurInfo.Exprs.push_back(L.VD);
8622           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8623           CurInfo.Pointers.push_back(Ptr);
8624           CurInfo.Sizes.push_back(
8625               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8626           CurInfo.Mappers.push_back(nullptr);
8627         }
8628       }
8629       // If there is an entry in PartialStruct it means we have a struct with
8630       // individual members mapped. Emit an extra combined entry.
8631       if (PartialStruct.Base.isValid()) {
8632         CurInfo.NonContigInfo.Dims.push_back(0);
8633         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8634       }
8635 
8636       // We need to append the results of this capture to what we already
8637       // have.
8638       CombinedInfo.append(CurInfo);
8639     }
8640     // Append data for use_device_ptr clauses.
8641     CombinedInfo.append(UseDevicePtrCombinedInfo);
8642   }
8643 
8644 public:
8645   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8646       : CurDir(&Dir), CGF(CGF) {
8647     // Extract firstprivate clause information.
8648     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8649       for (const auto *D : C->varlists())
8650         FirstPrivateDecls.try_emplace(
8651             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8652     // Extract implicit firstprivates from uses_allocators clauses.
8653     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8654       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8655         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8656         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8657           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8658                                         /*Implicit=*/true);
8659         else if (const auto *VD = dyn_cast<VarDecl>(
8660                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8661                          ->getDecl()))
8662           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8663       }
8664     }
8665     // Extract device pointer clause information.
8666     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8667       for (auto L : C->component_lists())
8668         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8669   }
8670 
8671   /// Constructor for the declare mapper directive.
8672   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8673       : CurDir(&Dir), CGF(CGF) {}
8674 
8675   /// Generate code for the combined entry if we have a partially mapped struct
8676   /// and take care of the mapping flags of the arguments corresponding to
8677   /// individual struct members.
8678   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8679                          MapFlagsArrayTy &CurTypes,
8680                          const StructRangeInfoTy &PartialStruct,
8681                          const ValueDecl *VD = nullptr,
8682                          bool NotTargetParams = true) const {
8683     if (CurTypes.size() == 1 &&
8684         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8685         !PartialStruct.IsArraySection)
8686       return;
8687     Address LBAddr = PartialStruct.LowestElem.second;
8688     Address HBAddr = PartialStruct.HighestElem.second;
8689     if (PartialStruct.HasCompleteRecord) {
8690       LBAddr = PartialStruct.LB;
8691       HBAddr = PartialStruct.LB;
8692     }
8693     CombinedInfo.Exprs.push_back(VD);
8694     // Base is the base of the struct
8695     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8696     // Pointer is the address of the lowest element
8697     llvm::Value *LB = LBAddr.getPointer();
8698     CombinedInfo.Pointers.push_back(LB);
8699     // There should not be a mapper for a combined entry.
8700     CombinedInfo.Mappers.push_back(nullptr);
8701     // Size is (addr of {highest+1} element) - (addr of lowest element)
8702     llvm::Value *HB = HBAddr.getPointer();
8703     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8704     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8705     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8706     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8707     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8708                                                   /*isSigned=*/false);
8709     CombinedInfo.Sizes.push_back(Size);
8710     // Map type is always TARGET_PARAM, if generate info for captures.
8711     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8712                                                  : OMP_MAP_TARGET_PARAM);
8713     // If any element has the present modifier, then make sure the runtime
8714     // doesn't attempt to allocate the struct.
8715     if (CurTypes.end() !=
8716         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8717           return Type & OMP_MAP_PRESENT;
8718         }))
8719       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8720     // Remove TARGET_PARAM flag from the first element
8721     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8722 
8723     // All other current entries will be MEMBER_OF the combined entry
8724     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8725     // 0xFFFF in the MEMBER_OF field).
8726     OpenMPOffloadMappingFlags MemberOfFlag =
8727         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8728     for (auto &M : CurTypes)
8729       setCorrectMemberOfFlag(M, MemberOfFlag);
8730   }
8731 
8732   /// Generate all the base pointers, section pointers, sizes, map types, and
8733   /// mappers for the extracted mappable expressions (all included in \a
8734   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8735   /// pair of the relevant declaration and index where it occurs is appended to
8736   /// the device pointers info array.
8737   void generateAllInfo(
8738       MapCombinedInfoTy &CombinedInfo,
8739       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8740           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8741     assert(CurDir.is<const OMPExecutableDirective *>() &&
8742            "Expect a executable directive");
8743     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8744     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8745   }
8746 
8747   /// Generate all the base pointers, section pointers, sizes, map types, and
8748   /// mappers for the extracted map clauses of user-defined mapper (all included
8749   /// in \a CombinedInfo).
8750   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8751     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8752            "Expect a declare mapper directive");
8753     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8754     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8755   }
8756 
8757   /// Emit capture info for lambdas for variables captured by reference.
8758   void generateInfoForLambdaCaptures(
8759       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8760       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8761     const auto *RD = VD->getType()
8762                          .getCanonicalType()
8763                          .getNonReferenceType()
8764                          ->getAsCXXRecordDecl();
8765     if (!RD || !RD->isLambda())
8766       return;
8767     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8768     LValue VDLVal = CGF.MakeAddrLValue(
8769         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8770     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8771     FieldDecl *ThisCapture = nullptr;
8772     RD->getCaptureFields(Captures, ThisCapture);
8773     if (ThisCapture) {
8774       LValue ThisLVal =
8775           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8776       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8777       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8778                                  VDLVal.getPointer(CGF));
8779       CombinedInfo.Exprs.push_back(VD);
8780       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8781       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8782       CombinedInfo.Sizes.push_back(
8783           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8784                                     CGF.Int64Ty, /*isSigned=*/true));
8785       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8786                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8787       CombinedInfo.Mappers.push_back(nullptr);
8788     }
8789     for (const LambdaCapture &LC : RD->captures()) {
8790       if (!LC.capturesVariable())
8791         continue;
8792       const VarDecl *VD = LC.getCapturedVar();
8793       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8794         continue;
8795       auto It = Captures.find(VD);
8796       assert(It != Captures.end() && "Found lambda capture without field.");
8797       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8798       if (LC.getCaptureKind() == LCK_ByRef) {
8799         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8800         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8801                                    VDLVal.getPointer(CGF));
8802         CombinedInfo.Exprs.push_back(VD);
8803         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8804         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8805         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8806             CGF.getTypeSize(
8807                 VD->getType().getCanonicalType().getNonReferenceType()),
8808             CGF.Int64Ty, /*isSigned=*/true));
8809       } else {
8810         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8811         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8812                                    VDLVal.getPointer(CGF));
8813         CombinedInfo.Exprs.push_back(VD);
8814         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8815         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8816         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8817       }
8818       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8819                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8820       CombinedInfo.Mappers.push_back(nullptr);
8821     }
8822   }
8823 
8824   /// Set correct indices for lambdas captures.
8825   void adjustMemberOfForLambdaCaptures(
8826       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8827       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8828       MapFlagsArrayTy &Types) const {
8829     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8830       // Set correct member_of idx for all implicit lambda captures.
8831       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8832                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8833         continue;
8834       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8835       assert(BasePtr && "Unable to find base lambda address.");
8836       int TgtIdx = -1;
8837       for (unsigned J = I; J > 0; --J) {
8838         unsigned Idx = J - 1;
8839         if (Pointers[Idx] != BasePtr)
8840           continue;
8841         TgtIdx = Idx;
8842         break;
8843       }
8844       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8845       // All other current entries will be MEMBER_OF the combined entry
8846       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8847       // 0xFFFF in the MEMBER_OF field).
8848       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8849       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8850     }
8851   }
8852 
8853   /// Generate the base pointers, section pointers, sizes, map types, and
8854   /// mappers associated to a given capture (all included in \a CombinedInfo).
8855   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8856                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8857                               StructRangeInfoTy &PartialStruct) const {
8858     assert(!Cap->capturesVariableArrayType() &&
8859            "Not expecting to generate map info for a variable array type!");
8860 
8861     // We need to know when we generating information for the first component
8862     const ValueDecl *VD = Cap->capturesThis()
8863                               ? nullptr
8864                               : Cap->getCapturedVar()->getCanonicalDecl();
8865 
8866     // If this declaration appears in a is_device_ptr clause we just have to
8867     // pass the pointer by value. If it is a reference to a declaration, we just
8868     // pass its value.
8869     if (DevPointersMap.count(VD)) {
8870       CombinedInfo.Exprs.push_back(VD);
8871       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8872       CombinedInfo.Pointers.push_back(Arg);
8873       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8874           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8875           /*isSigned=*/true));
8876       CombinedInfo.Types.push_back(
8877           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8878           OMP_MAP_TARGET_PARAM);
8879       CombinedInfo.Mappers.push_back(nullptr);
8880       return;
8881     }
8882 
8883     using MapData =
8884         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8885                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8886                    const ValueDecl *, const Expr *>;
8887     SmallVector<MapData, 4> DeclComponentLists;
8888     assert(CurDir.is<const OMPExecutableDirective *>() &&
8889            "Expect a executable directive");
8890     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8891     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8892       const auto *EI = C->getVarRefs().begin();
8893       for (const auto L : C->decl_component_lists(VD)) {
8894         const ValueDecl *VDecl, *Mapper;
8895         // The Expression is not correct if the mapping is implicit
8896         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8897         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8898         std::tie(VDecl, Components, Mapper) = L;
8899         assert(VDecl == VD && "We got information for the wrong declaration??");
8900         assert(!Components.empty() &&
8901                "Not expecting declaration with no component lists.");
8902         DeclComponentLists.emplace_back(Components, C->getMapType(),
8903                                         C->getMapTypeModifiers(),
8904                                         C->isImplicit(), Mapper, E);
8905         ++EI;
8906       }
8907     }
8908     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8909                                              const MapData &RHS) {
8910       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8911       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8912       bool HasPresent = !MapModifiers.empty() &&
8913                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8914                           return K == clang::OMPC_MAP_MODIFIER_present;
8915                         });
8916       bool HasAllocs = MapType == OMPC_MAP_alloc;
8917       MapModifiers = std::get<2>(RHS);
8918       MapType = std::get<1>(LHS);
8919       bool HasPresentR =
8920           !MapModifiers.empty() &&
8921           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8922             return K == clang::OMPC_MAP_MODIFIER_present;
8923           });
8924       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8925       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8926     });
8927 
8928     // Find overlapping elements (including the offset from the base element).
8929     llvm::SmallDenseMap<
8930         const MapData *,
8931         llvm::SmallVector<
8932             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8933         4>
8934         OverlappedData;
8935     size_t Count = 0;
8936     for (const MapData &L : DeclComponentLists) {
8937       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8938       OpenMPMapClauseKind MapType;
8939       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8940       bool IsImplicit;
8941       const ValueDecl *Mapper;
8942       const Expr *VarRef;
8943       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8944           L;
8945       ++Count;
8946       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8947         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8948         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8949                  VarRef) = L1;
8950         auto CI = Components.rbegin();
8951         auto CE = Components.rend();
8952         auto SI = Components1.rbegin();
8953         auto SE = Components1.rend();
8954         for (; CI != CE && SI != SE; ++CI, ++SI) {
8955           if (CI->getAssociatedExpression()->getStmtClass() !=
8956               SI->getAssociatedExpression()->getStmtClass())
8957             break;
8958           // Are we dealing with different variables/fields?
8959           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8960             break;
8961         }
8962         // Found overlapping if, at least for one component, reached the head
8963         // of the components list.
8964         if (CI == CE || SI == SE) {
8965           // Ignore it if it is the same component.
8966           if (CI == CE && SI == SE)
8967             continue;
8968           const auto It = (SI == SE) ? CI : SI;
8969           // If one component is a pointer and another one is a kind of
8970           // dereference of this pointer (array subscript, section, dereference,
8971           // etc.), it is not an overlapping.
8972           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8973               std::prev(It)
8974                   ->getAssociatedExpression()
8975                   ->getType()
8976                   ->isPointerType())
8977             continue;
8978           const MapData &BaseData = CI == CE ? L : L1;
8979           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8980               SI == SE ? Components : Components1;
8981           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8982           OverlappedElements.getSecond().push_back(SubData);
8983         }
8984       }
8985     }
8986     // Sort the overlapped elements for each item.
8987     llvm::SmallVector<const FieldDecl *, 4> Layout;
8988     if (!OverlappedData.empty()) {
8989       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8990       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8991       while (BaseType != OrigType) {
8992         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8993         OrigType = BaseType->getPointeeOrArrayElementType();
8994       }
8995 
8996       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8997         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8998       else {
8999         const auto *RD = BaseType->getAsRecordDecl();
9000         Layout.append(RD->field_begin(), RD->field_end());
9001       }
9002     }
9003     for (auto &Pair : OverlappedData) {
9004       llvm::stable_sort(
9005           Pair.getSecond(),
9006           [&Layout](
9007               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9008               OMPClauseMappableExprCommon::MappableExprComponentListRef
9009                   Second) {
9010             auto CI = First.rbegin();
9011             auto CE = First.rend();
9012             auto SI = Second.rbegin();
9013             auto SE = Second.rend();
9014             for (; CI != CE && SI != SE; ++CI, ++SI) {
9015               if (CI->getAssociatedExpression()->getStmtClass() !=
9016                   SI->getAssociatedExpression()->getStmtClass())
9017                 break;
9018               // Are we dealing with different variables/fields?
9019               if (CI->getAssociatedDeclaration() !=
9020                   SI->getAssociatedDeclaration())
9021                 break;
9022             }
9023 
9024             // Lists contain the same elements.
9025             if (CI == CE && SI == SE)
9026               return false;
9027 
9028             // List with less elements is less than list with more elements.
9029             if (CI == CE || SI == SE)
9030               return CI == CE;
9031 
9032             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9033             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9034             if (FD1->getParent() == FD2->getParent())
9035               return FD1->getFieldIndex() < FD2->getFieldIndex();
9036             const auto It =
9037                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9038                   return FD == FD1 || FD == FD2;
9039                 });
9040             return *It == FD1;
9041           });
9042     }
9043 
9044     // Associated with a capture, because the mapping flags depend on it.
9045     // Go through all of the elements with the overlapped elements.
9046     bool IsFirstComponentList = true;
9047     for (const auto &Pair : OverlappedData) {
9048       const MapData &L = *Pair.getFirst();
9049       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9050       OpenMPMapClauseKind MapType;
9051       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9052       bool IsImplicit;
9053       const ValueDecl *Mapper;
9054       const Expr *VarRef;
9055       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9056           L;
9057       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9058           OverlappedComponents = Pair.getSecond();
9059       generateInfoForComponentList(
9060           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9061           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9062           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9063       IsFirstComponentList = false;
9064     }
9065     // Go through other elements without overlapped elements.
9066     for (const MapData &L : DeclComponentLists) {
9067       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9068       OpenMPMapClauseKind MapType;
9069       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9070       bool IsImplicit;
9071       const ValueDecl *Mapper;
9072       const Expr *VarRef;
9073       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9074           L;
9075       auto It = OverlappedData.find(&L);
9076       if (It == OverlappedData.end())
9077         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9078                                      Components, CombinedInfo, PartialStruct,
9079                                      IsFirstComponentList, IsImplicit, Mapper,
9080                                      /*ForDeviceAddr=*/false, VD, VarRef);
9081       IsFirstComponentList = false;
9082     }
9083   }
9084 
9085   /// Generate the default map information for a given capture \a CI,
9086   /// record field declaration \a RI and captured value \a CV.
9087   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9088                               const FieldDecl &RI, llvm::Value *CV,
9089                               MapCombinedInfoTy &CombinedInfo) const {
9090     bool IsImplicit = true;
9091     // Do the default mapping.
9092     if (CI.capturesThis()) {
9093       CombinedInfo.Exprs.push_back(nullptr);
9094       CombinedInfo.BasePointers.push_back(CV);
9095       CombinedInfo.Pointers.push_back(CV);
9096       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9097       CombinedInfo.Sizes.push_back(
9098           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9099                                     CGF.Int64Ty, /*isSigned=*/true));
9100       // Default map type.
9101       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9102     } else if (CI.capturesVariableByCopy()) {
9103       const VarDecl *VD = CI.getCapturedVar();
9104       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9105       CombinedInfo.BasePointers.push_back(CV);
9106       CombinedInfo.Pointers.push_back(CV);
9107       if (!RI.getType()->isAnyPointerType()) {
9108         // We have to signal to the runtime captures passed by value that are
9109         // not pointers.
9110         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9111         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9112             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9113       } else {
9114         // Pointers are implicitly mapped with a zero size and no flags
9115         // (other than first map that is added for all implicit maps).
9116         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9117         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9118       }
9119       auto I = FirstPrivateDecls.find(VD);
9120       if (I != FirstPrivateDecls.end())
9121         IsImplicit = I->getSecond();
9122     } else {
9123       assert(CI.capturesVariable() && "Expected captured reference.");
9124       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9125       QualType ElementType = PtrTy->getPointeeType();
9126       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9127           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9128       // The default map type for a scalar/complex type is 'to' because by
9129       // default the value doesn't have to be retrieved. For an aggregate
9130       // type, the default is 'tofrom'.
9131       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9132       const VarDecl *VD = CI.getCapturedVar();
9133       auto I = FirstPrivateDecls.find(VD);
9134       if (I != FirstPrivateDecls.end() &&
9135           VD->getType().isConstant(CGF.getContext())) {
9136         llvm::Constant *Addr =
9137             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9138         // Copy the value of the original variable to the new global copy.
9139         CGF.Builder.CreateMemCpy(
9140             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9141             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9142             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9143         // Use new global variable as the base pointers.
9144         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9145         CombinedInfo.BasePointers.push_back(Addr);
9146         CombinedInfo.Pointers.push_back(Addr);
9147       } else {
9148         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9149         CombinedInfo.BasePointers.push_back(CV);
9150         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9151           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9152               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9153               AlignmentSource::Decl));
9154           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9155         } else {
9156           CombinedInfo.Pointers.push_back(CV);
9157         }
9158       }
9159       if (I != FirstPrivateDecls.end())
9160         IsImplicit = I->getSecond();
9161     }
9162     // Every default map produces a single argument which is a target parameter.
9163     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9164 
9165     // Add flag stating this is an implicit map.
9166     if (IsImplicit)
9167       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9168 
9169     // No user-defined mapper for default mapping.
9170     CombinedInfo.Mappers.push_back(nullptr);
9171   }
9172 };
9173 } // anonymous namespace
9174 
9175 static void emitNonContiguousDescriptor(
9176     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9177     CGOpenMPRuntime::TargetDataInfo &Info) {
9178   CodeGenModule &CGM = CGF.CGM;
9179   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9180       &NonContigInfo = CombinedInfo.NonContigInfo;
9181 
9182   // Build an array of struct descriptor_dim and then assign it to
9183   // offload_args.
9184   //
9185   // struct descriptor_dim {
9186   //  uint64_t offset;
9187   //  uint64_t count;
9188   //  uint64_t stride
9189   // };
9190   ASTContext &C = CGF.getContext();
9191   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9192   RecordDecl *RD;
9193   RD = C.buildImplicitRecord("descriptor_dim");
9194   RD->startDefinition();
9195   addFieldToRecordDecl(C, RD, Int64Ty);
9196   addFieldToRecordDecl(C, RD, Int64Ty);
9197   addFieldToRecordDecl(C, RD, Int64Ty);
9198   RD->completeDefinition();
9199   QualType DimTy = C.getRecordType(RD);
9200 
9201   enum { OffsetFD = 0, CountFD, StrideFD };
9202   // We need two index variable here since the size of "Dims" is the same as the
9203   // size of Components, however, the size of offset, count, and stride is equal
9204   // to the size of base declaration that is non-contiguous.
9205   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9206     // Skip emitting ir if dimension size is 1 since it cannot be
9207     // non-contiguous.
9208     if (NonContigInfo.Dims[I] == 1)
9209       continue;
9210     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9211     QualType ArrayTy =
9212         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9213     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9214     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9215       unsigned RevIdx = EE - II - 1;
9216       LValue DimsLVal = CGF.MakeAddrLValue(
9217           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9218       // Offset
9219       LValue OffsetLVal = CGF.EmitLValueForField(
9220           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9221       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9222       // Count
9223       LValue CountLVal = CGF.EmitLValueForField(
9224           DimsLVal, *std::next(RD->field_begin(), CountFD));
9225       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9226       // Stride
9227       LValue StrideLVal = CGF.EmitLValueForField(
9228           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9229       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9230     }
9231     // args[I] = &dims
9232     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9233         DimsAddr, CGM.Int8PtrTy);
9234     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9235         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9236         Info.PointersArray, 0, I);
9237     Address PAddr(P, CGF.getPointerAlign());
9238     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9239     ++L;
9240   }
9241 }
9242 
9243 /// Emit a string constant containing the names of the values mapped to the
9244 /// offloading runtime library.
9245 llvm::Constant *
9246 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9247                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9248   llvm::Constant *SrcLocStr;
9249   if (!MapExprs.getMapDecl()) {
9250     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9251   } else {
9252     std::string ExprName = "";
9253     if (MapExprs.getMapExpr()) {
9254       PrintingPolicy P(CGF.getContext().getLangOpts());
9255       llvm::raw_string_ostream OS(ExprName);
9256       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9257       OS.flush();
9258     } else {
9259       ExprName = MapExprs.getMapDecl()->getNameAsString();
9260     }
9261 
9262     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9263     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9264     const char *FileName = PLoc.getFilename();
9265     unsigned Line = PLoc.getLine();
9266     unsigned Column = PLoc.getColumn();
9267     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9268                                                 Line, Column);
9269   }
9270 
9271   return SrcLocStr;
9272 }
9273 
9274 /// Emit the arrays used to pass the captures and map information to the
9275 /// offloading runtime library. If there is no map or capture information,
9276 /// return nullptr by reference.
9277 static void emitOffloadingArrays(
9278     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9279     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9280     bool IsNonContiguous = false) {
9281   CodeGenModule &CGM = CGF.CGM;
9282   ASTContext &Ctx = CGF.getContext();
9283 
9284   // Reset the array information.
9285   Info.clearArrayInfo();
9286   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9287 
9288   if (Info.NumberOfPtrs) {
9289     // Detect if we have any capture size requiring runtime evaluation of the
9290     // size so that a constant array could be eventually used.
9291     bool hasRuntimeEvaluationCaptureSize = false;
9292     for (llvm::Value *S : CombinedInfo.Sizes)
9293       if (!isa<llvm::Constant>(S)) {
9294         hasRuntimeEvaluationCaptureSize = true;
9295         break;
9296       }
9297 
9298     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9299     QualType PointerArrayType = Ctx.getConstantArrayType(
9300         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9301         /*IndexTypeQuals=*/0);
9302 
9303     Info.BasePointersArray =
9304         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9305     Info.PointersArray =
9306         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9307     Address MappersArray =
9308         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9309     Info.MappersArray = MappersArray.getPointer();
9310 
9311     // If we don't have any VLA types or other types that require runtime
9312     // evaluation, we can use a constant array for the map sizes, otherwise we
9313     // need to fill up the arrays as we do for the pointers.
9314     QualType Int64Ty =
9315         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9316     if (hasRuntimeEvaluationCaptureSize) {
9317       QualType SizeArrayType = Ctx.getConstantArrayType(
9318           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9319           /*IndexTypeQuals=*/0);
9320       Info.SizesArray =
9321           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9322     } else {
9323       // We expect all the sizes to be constant, so we collect them to create
9324       // a constant array.
9325       SmallVector<llvm::Constant *, 16> ConstSizes;
9326       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9327         if (IsNonContiguous &&
9328             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9329           ConstSizes.push_back(llvm::ConstantInt::get(
9330               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9331         } else {
9332           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9333         }
9334       }
9335 
9336       auto *SizesArrayInit = llvm::ConstantArray::get(
9337           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9338       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9339       auto *SizesArrayGbl = new llvm::GlobalVariable(
9340           CGM.getModule(), SizesArrayInit->getType(),
9341           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9342           SizesArrayInit, Name);
9343       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9344       Info.SizesArray = SizesArrayGbl;
9345     }
9346 
9347     // The map types are always constant so we don't need to generate code to
9348     // fill arrays. Instead, we create an array constant.
9349     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9350     llvm::copy(CombinedInfo.Types, Mapping.begin());
9351     llvm::Constant *MapTypesArrayInit =
9352         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9353     std::string MaptypesName =
9354         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9355     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9356         CGM.getModule(), MapTypesArrayInit->getType(),
9357         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9358         MapTypesArrayInit, MaptypesName);
9359     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9360     Info.MapTypesArray = MapTypesArrayGbl;
9361 
9362     // The information types are only built if there is debug information
9363     // requested.
9364     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9365       Info.MapNamesArray = llvm::Constant::getNullValue(
9366           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9367     } else {
9368       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9369         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9370       };
9371       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9372       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9373 
9374       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9375           llvm::ArrayType::get(
9376               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9377               CombinedInfo.Exprs.size()),
9378           InfoMap);
9379       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9380           CGM.getModule(), MapNamesArrayInit->getType(),
9381           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9382           MapNamesArrayInit,
9383           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9384       Info.MapNamesArray = MapNamesArrayGbl;
9385     }
9386 
9387     // If there's a present map type modifier, it must not be applied to the end
9388     // of a region, so generate a separate map type array in that case.
9389     if (Info.separateBeginEndCalls()) {
9390       bool EndMapTypesDiffer = false;
9391       for (uint64_t &Type : Mapping) {
9392         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9393           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9394           EndMapTypesDiffer = true;
9395         }
9396       }
9397       if (EndMapTypesDiffer) {
9398         MapTypesArrayInit =
9399             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9400         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9401         MapTypesArrayGbl = new llvm::GlobalVariable(
9402             CGM.getModule(), MapTypesArrayInit->getType(),
9403             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9404             MapTypesArrayInit, MaptypesName);
9405         MapTypesArrayGbl->setUnnamedAddr(
9406             llvm::GlobalValue::UnnamedAddr::Global);
9407         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9408       }
9409     }
9410 
9411     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9412       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9413       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9414           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9415           Info.BasePointersArray, 0, I);
9416       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9417           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9418       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9419       CGF.Builder.CreateStore(BPVal, BPAddr);
9420 
9421       if (Info.requiresDevicePointerInfo())
9422         if (const ValueDecl *DevVD =
9423                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9424           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9425 
9426       llvm::Value *PVal = CombinedInfo.Pointers[I];
9427       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9428           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9429           Info.PointersArray, 0, I);
9430       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9431           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9432       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9433       CGF.Builder.CreateStore(PVal, PAddr);
9434 
9435       if (hasRuntimeEvaluationCaptureSize) {
9436         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9437             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9438             Info.SizesArray,
9439             /*Idx0=*/0,
9440             /*Idx1=*/I);
9441         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9442         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9443                                                           CGM.Int64Ty,
9444                                                           /*isSigned=*/true),
9445                                 SAddr);
9446       }
9447 
9448       // Fill up the mapper array.
9449       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9450       if (CombinedInfo.Mappers[I]) {
9451         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9452             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9453         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9454         Info.HasMapper = true;
9455       }
9456       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9457       CGF.Builder.CreateStore(MFunc, MAddr);
9458     }
9459   }
9460 
9461   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9462       Info.NumberOfPtrs == 0)
9463     return;
9464 
9465   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9466 }
9467 
9468 namespace {
9469 /// Additional arguments for emitOffloadingArraysArgument function.
9470 struct ArgumentsOptions {
9471   bool ForEndCall = false;
9472   ArgumentsOptions() = default;
9473   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9474 };
9475 } // namespace
9476 
9477 /// Emit the arguments to be passed to the runtime library based on the
9478 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9479 /// ForEndCall, emit map types to be passed for the end of the region instead of
9480 /// the beginning.
9481 static void emitOffloadingArraysArgument(
9482     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9483     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9484     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9485     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9486     const ArgumentsOptions &Options = ArgumentsOptions()) {
9487   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9488          "expected region end call to runtime only when end call is separate");
9489   CodeGenModule &CGM = CGF.CGM;
9490   if (Info.NumberOfPtrs) {
9491     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9492         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9493         Info.BasePointersArray,
9494         /*Idx0=*/0, /*Idx1=*/0);
9495     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9496         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9497         Info.PointersArray,
9498         /*Idx0=*/0,
9499         /*Idx1=*/0);
9500     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9501         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9502         /*Idx0=*/0, /*Idx1=*/0);
9503     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9504         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9505         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9506                                                     : Info.MapTypesArray,
9507         /*Idx0=*/0,
9508         /*Idx1=*/0);
9509 
9510     // Only emit the mapper information arrays if debug information is
9511     // requested.
9512     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9513       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9514     else
9515       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9516           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9517           Info.MapNamesArray,
9518           /*Idx0=*/0,
9519           /*Idx1=*/0);
9520     // If there is no user-defined mapper, set the mapper array to nullptr to
9521     // avoid an unnecessary data privatization
9522     if (!Info.HasMapper)
9523       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9524     else
9525       MappersArrayArg =
9526           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9527   } else {
9528     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9529     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9530     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9531     MapTypesArrayArg =
9532         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9533     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9534     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9535   }
9536 }
9537 
9538 /// Check for inner distribute directive.
9539 static const OMPExecutableDirective *
9540 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9541   const auto *CS = D.getInnermostCapturedStmt();
9542   const auto *Body =
9543       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9544   const Stmt *ChildStmt =
9545       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9546 
9547   if (const auto *NestedDir =
9548           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9549     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9550     switch (D.getDirectiveKind()) {
9551     case OMPD_target:
9552       if (isOpenMPDistributeDirective(DKind))
9553         return NestedDir;
9554       if (DKind == OMPD_teams) {
9555         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9556             /*IgnoreCaptured=*/true);
9557         if (!Body)
9558           return nullptr;
9559         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9560         if (const auto *NND =
9561                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9562           DKind = NND->getDirectiveKind();
9563           if (isOpenMPDistributeDirective(DKind))
9564             return NND;
9565         }
9566       }
9567       return nullptr;
9568     case OMPD_target_teams:
9569       if (isOpenMPDistributeDirective(DKind))
9570         return NestedDir;
9571       return nullptr;
9572     case OMPD_target_parallel:
9573     case OMPD_target_simd:
9574     case OMPD_target_parallel_for:
9575     case OMPD_target_parallel_for_simd:
9576       return nullptr;
9577     case OMPD_target_teams_distribute:
9578     case OMPD_target_teams_distribute_simd:
9579     case OMPD_target_teams_distribute_parallel_for:
9580     case OMPD_target_teams_distribute_parallel_for_simd:
9581     case OMPD_parallel:
9582     case OMPD_for:
9583     case OMPD_parallel_for:
9584     case OMPD_parallel_master:
9585     case OMPD_parallel_sections:
9586     case OMPD_for_simd:
9587     case OMPD_parallel_for_simd:
9588     case OMPD_cancel:
9589     case OMPD_cancellation_point:
9590     case OMPD_ordered:
9591     case OMPD_threadprivate:
9592     case OMPD_allocate:
9593     case OMPD_task:
9594     case OMPD_simd:
9595     case OMPD_tile:
9596     case OMPD_sections:
9597     case OMPD_section:
9598     case OMPD_single:
9599     case OMPD_master:
9600     case OMPD_critical:
9601     case OMPD_taskyield:
9602     case OMPD_barrier:
9603     case OMPD_taskwait:
9604     case OMPD_taskgroup:
9605     case OMPD_atomic:
9606     case OMPD_flush:
9607     case OMPD_depobj:
9608     case OMPD_scan:
9609     case OMPD_teams:
9610     case OMPD_target_data:
9611     case OMPD_target_exit_data:
9612     case OMPD_target_enter_data:
9613     case OMPD_distribute:
9614     case OMPD_distribute_simd:
9615     case OMPD_distribute_parallel_for:
9616     case OMPD_distribute_parallel_for_simd:
9617     case OMPD_teams_distribute:
9618     case OMPD_teams_distribute_simd:
9619     case OMPD_teams_distribute_parallel_for:
9620     case OMPD_teams_distribute_parallel_for_simd:
9621     case OMPD_target_update:
9622     case OMPD_declare_simd:
9623     case OMPD_declare_variant:
9624     case OMPD_begin_declare_variant:
9625     case OMPD_end_declare_variant:
9626     case OMPD_declare_target:
9627     case OMPD_end_declare_target:
9628     case OMPD_declare_reduction:
9629     case OMPD_declare_mapper:
9630     case OMPD_taskloop:
9631     case OMPD_taskloop_simd:
9632     case OMPD_master_taskloop:
9633     case OMPD_master_taskloop_simd:
9634     case OMPD_parallel_master_taskloop:
9635     case OMPD_parallel_master_taskloop_simd:
9636     case OMPD_requires:
9637     case OMPD_unknown:
9638     default:
9639       llvm_unreachable("Unexpected directive.");
9640     }
9641   }
9642 
9643   return nullptr;
9644 }
9645 
9646 /// Emit the user-defined mapper function. The code generation follows the
9647 /// pattern in the example below.
9648 /// \code
9649 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9650 ///                                           void *base, void *begin,
9651 ///                                           int64_t size, int64_t type,
9652 ///                                           void *name = nullptr) {
9653 ///   // Allocate space for an array section first or add a base/begin for
9654 ///   // pointer dereference.
9655 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9656 ///       !maptype.IsDelete)
9657 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9658 ///                                 size*sizeof(Ty), clearToFromMember(type));
9659 ///   // Map members.
9660 ///   for (unsigned i = 0; i < size; i++) {
9661 ///     // For each component specified by this mapper:
9662 ///     for (auto c : begin[i]->all_components) {
9663 ///       if (c.hasMapper())
9664 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9665 ///                       c.arg_type, c.arg_name);
9666 ///       else
9667 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9668 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9669 ///                                     c.arg_name);
9670 ///     }
9671 ///   }
9672 ///   // Delete the array section.
9673 ///   if (size > 1 && maptype.IsDelete)
9674 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9675 ///                                 size*sizeof(Ty), clearToFromMember(type));
9676 /// }
9677 /// \endcode
9678 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9679                                             CodeGenFunction *CGF) {
9680   if (UDMMap.count(D) > 0)
9681     return;
9682   ASTContext &C = CGM.getContext();
9683   QualType Ty = D->getType();
9684   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9685   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9686   auto *MapperVarDecl =
9687       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9688   SourceLocation Loc = D->getLocation();
9689   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9690 
9691   // Prepare mapper function arguments and attributes.
9692   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9693                               C.VoidPtrTy, ImplicitParamDecl::Other);
9694   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9695                             ImplicitParamDecl::Other);
9696   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9697                              C.VoidPtrTy, ImplicitParamDecl::Other);
9698   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9699                             ImplicitParamDecl::Other);
9700   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9701                             ImplicitParamDecl::Other);
9702   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9703                             ImplicitParamDecl::Other);
9704   FunctionArgList Args;
9705   Args.push_back(&HandleArg);
9706   Args.push_back(&BaseArg);
9707   Args.push_back(&BeginArg);
9708   Args.push_back(&SizeArg);
9709   Args.push_back(&TypeArg);
9710   Args.push_back(&NameArg);
9711   const CGFunctionInfo &FnInfo =
9712       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9713   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9714   SmallString<64> TyStr;
9715   llvm::raw_svector_ostream Out(TyStr);
9716   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9717   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9718   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9719                                     Name, &CGM.getModule());
9720   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9721   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9722   // Start the mapper function code generation.
9723   CodeGenFunction MapperCGF(CGM);
9724   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9725   // Compute the starting and end addresses of array elements.
9726   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9727       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9728       C.getPointerType(Int64Ty), Loc);
9729   // Prepare common arguments for array initiation and deletion.
9730   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9731       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9732       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9733   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9734       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9735       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9736   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9737       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9738       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9739   // Convert the size in bytes into the number of array elements.
9740   Size = MapperCGF.Builder.CreateExactUDiv(
9741       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9742   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9743       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9744   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9745   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9746       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9747       C.getPointerType(Int64Ty), Loc);
9748 
9749   // Emit array initiation if this is an array section and \p MapType indicates
9750   // that memory allocation is required.
9751   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9752   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9753                              ElementSize, HeadBB, /*IsInit=*/true);
9754 
9755   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9756 
9757   // Emit the loop header block.
9758   MapperCGF.EmitBlock(HeadBB);
9759   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9760   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9761   // Evaluate whether the initial condition is satisfied.
9762   llvm::Value *IsEmpty =
9763       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9764   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9765   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9766 
9767   // Emit the loop body block.
9768   MapperCGF.EmitBlock(BodyBB);
9769   llvm::BasicBlock *LastBB = BodyBB;
9770   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9771       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9772   PtrPHI->addIncoming(PtrBegin, EntryBB);
9773   Address PtrCurrent =
9774       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9775                           .getAlignment()
9776                           .alignmentOfArrayElement(ElementSize));
9777   // Privatize the declared variable of mapper to be the current array element.
9778   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9779   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9780   (void)Scope.Privatize();
9781 
9782   // Get map clause information. Fill up the arrays with all mapped variables.
9783   MappableExprsHandler::MapCombinedInfoTy Info;
9784   MappableExprsHandler MEHandler(*D, MapperCGF);
9785   MEHandler.generateAllInfoForMapper(Info);
9786 
9787   // Call the runtime API __tgt_mapper_num_components to get the number of
9788   // pre-existing components.
9789   llvm::Value *OffloadingArgs[] = {Handle};
9790   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9791       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9792                                             OMPRTL___tgt_mapper_num_components),
9793       OffloadingArgs);
9794   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9795       PreviousSize,
9796       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9797 
9798   // Fill up the runtime mapper handle for all components.
9799   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9800     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9801         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9802     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9803         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9804     llvm::Value *CurSizeArg = Info.Sizes[I];
9805     llvm::Value *CurNameArg =
9806         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9807             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9808             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9809 
9810     // Extract the MEMBER_OF field from the map type.
9811     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9812     llvm::Value *MemberMapType =
9813         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9814 
9815     // Combine the map type inherited from user-defined mapper with that
9816     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9817     // bits of the \a MapType, which is the input argument of the mapper
9818     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9819     // bits of MemberMapType.
9820     // [OpenMP 5.0], 1.2.6. map-type decay.
9821     //        | alloc |  to   | from  | tofrom | release | delete
9822     // ----------------------------------------------------------
9823     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9824     // to     | alloc |  to   | alloc |   to   | release | delete
9825     // from   | alloc | alloc | from  |  from  | release | delete
9826     // tofrom | alloc |  to   | from  | tofrom | release | delete
9827     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9828         MapType,
9829         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9830                                    MappableExprsHandler::OMP_MAP_FROM));
9831     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9832     llvm::BasicBlock *AllocElseBB =
9833         MapperCGF.createBasicBlock("omp.type.alloc.else");
9834     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9835     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9836     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9837     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9838     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9839     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9840     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9841     MapperCGF.EmitBlock(AllocBB);
9842     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9843         MemberMapType,
9844         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9845                                      MappableExprsHandler::OMP_MAP_FROM)));
9846     MapperCGF.Builder.CreateBr(EndBB);
9847     MapperCGF.EmitBlock(AllocElseBB);
9848     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9849         LeftToFrom,
9850         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9851     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9852     // In case of to, clear OMP_MAP_FROM.
9853     MapperCGF.EmitBlock(ToBB);
9854     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9855         MemberMapType,
9856         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9857     MapperCGF.Builder.CreateBr(EndBB);
9858     MapperCGF.EmitBlock(ToElseBB);
9859     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9860         LeftToFrom,
9861         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9862     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9863     // In case of from, clear OMP_MAP_TO.
9864     MapperCGF.EmitBlock(FromBB);
9865     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9866         MemberMapType,
9867         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9868     // In case of tofrom, do nothing.
9869     MapperCGF.EmitBlock(EndBB);
9870     LastBB = EndBB;
9871     llvm::PHINode *CurMapType =
9872         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9873     CurMapType->addIncoming(AllocMapType, AllocBB);
9874     CurMapType->addIncoming(ToMapType, ToBB);
9875     CurMapType->addIncoming(FromMapType, FromBB);
9876     CurMapType->addIncoming(MemberMapType, ToElseBB);
9877 
9878     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9879                                      CurSizeArg, CurMapType, CurNameArg};
9880     if (Info.Mappers[I]) {
9881       // Call the corresponding mapper function.
9882       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9883           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9884       assert(MapperFunc && "Expect a valid mapper function is available.");
9885       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9886     } else {
9887       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9888       // data structure.
9889       MapperCGF.EmitRuntimeCall(
9890           OMPBuilder.getOrCreateRuntimeFunction(
9891               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9892           OffloadingArgs);
9893     }
9894   }
9895 
9896   // Update the pointer to point to the next element that needs to be mapped,
9897   // and check whether we have mapped all elements.
9898   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9899       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9900   PtrPHI->addIncoming(PtrNext, LastBB);
9901   llvm::Value *IsDone =
9902       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9903   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9904   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9905 
9906   MapperCGF.EmitBlock(ExitBB);
9907   // Emit array deletion if this is an array section and \p MapType indicates
9908   // that deletion is required.
9909   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9910                              ElementSize, DoneBB, /*IsInit=*/false);
9911 
9912   // Emit the function exit block.
9913   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9914   MapperCGF.FinishFunction();
9915   UDMMap.try_emplace(D, Fn);
9916   if (CGF) {
9917     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9918     Decls.second.push_back(D);
9919   }
9920 }
9921 
9922 /// Emit the array initialization or deletion portion for user-defined mapper
9923 /// code generation. First, it evaluates whether an array section is mapped and
9924 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9925 /// true, and \a MapType indicates to not delete this array, array
9926 /// initialization code is generated. If \a IsInit is false, and \a MapType
9927 /// indicates to not this array, array deletion code is generated.
9928 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9929     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9930     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9931     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9932   StringRef Prefix = IsInit ? ".init" : ".del";
9933 
9934   // Evaluate if this is an array section.
9935   llvm::BasicBlock *BodyBB =
9936       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9937   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9938       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9939   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9940       MapType,
9941       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9942   llvm::Value *DeleteCond;
9943   llvm::Value *Cond;
9944   if (IsInit) {
9945     // base != begin?
9946     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9947         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9948     // IsPtrAndObj?
9949     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9950         MapType,
9951         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9952     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9953     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9954     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9955     DeleteCond = MapperCGF.Builder.CreateIsNull(
9956         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9957   } else {
9958     Cond = IsArray;
9959     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9960         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9961   }
9962   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9963   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9964 
9965   MapperCGF.EmitBlock(BodyBB);
9966   // Get the array size by multiplying element size and element number (i.e., \p
9967   // Size).
9968   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9969       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9970   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9971   // memory allocation/deletion purpose only.
9972   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9973       MapType,
9974       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9975                                    MappableExprsHandler::OMP_MAP_FROM |
9976                                    MappableExprsHandler::OMP_MAP_MEMBER_OF)));
9977   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9978 
9979   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9980   // data structure.
9981   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9982                                    ArraySize, MapTypeArg, MapNameArg};
9983   MapperCGF.EmitRuntimeCall(
9984       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9985                                             OMPRTL___tgt_push_mapper_component),
9986       OffloadingArgs);
9987 }
9988 
9989 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9990     const OMPDeclareMapperDecl *D) {
9991   auto I = UDMMap.find(D);
9992   if (I != UDMMap.end())
9993     return I->second;
9994   emitUserDefinedMapper(D);
9995   return UDMMap.lookup(D);
9996 }
9997 
9998 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9999     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10000     llvm::Value *DeviceID,
10001     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10002                                      const OMPLoopDirective &D)>
10003         SizeEmitter) {
10004   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10005   const OMPExecutableDirective *TD = &D;
10006   // Get nested teams distribute kind directive, if any.
10007   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10008     TD = getNestedDistributeDirective(CGM.getContext(), D);
10009   if (!TD)
10010     return;
10011   const auto *LD = cast<OMPLoopDirective>(TD);
10012   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10013                                                          PrePostActionTy &) {
10014     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10015       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10016       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10017       CGF.EmitRuntimeCall(
10018           OMPBuilder.getOrCreateRuntimeFunction(
10019               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10020           Args);
10021     }
10022   };
10023   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10024 }
10025 
10026 void CGOpenMPRuntime::emitTargetCall(
10027     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10028     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10029     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10030     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10031                                      const OMPLoopDirective &D)>
10032         SizeEmitter) {
10033   if (!CGF.HaveInsertPoint())
10034     return;
10035 
10036   assert(OutlinedFn && "Invalid outlined function!");
10037 
10038   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10039                                  D.hasClausesOfKind<OMPNowaitClause>();
10040   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10041   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10042   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10043                                             PrePostActionTy &) {
10044     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10045   };
10046   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10047 
10048   CodeGenFunction::OMPTargetDataInfo InputInfo;
10049   llvm::Value *MapTypesArray = nullptr;
10050   llvm::Value *MapNamesArray = nullptr;
10051   // Fill up the pointer arrays and transfer execution to the device.
10052   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10053                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10054                     &CapturedVars,
10055                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10056     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10057       // Reverse offloading is not supported, so just execute on the host.
10058       if (RequiresOuterTask) {
10059         CapturedVars.clear();
10060         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10061       }
10062       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10063       return;
10064     }
10065 
10066     // On top of the arrays that were filled up, the target offloading call
10067     // takes as arguments the device id as well as the host pointer. The host
10068     // pointer is used by the runtime library to identify the current target
10069     // region, so it only has to be unique and not necessarily point to
10070     // anything. It could be the pointer to the outlined function that
10071     // implements the target region, but we aren't using that so that the
10072     // compiler doesn't need to keep that, and could therefore inline the host
10073     // function if proven worthwhile during optimization.
10074 
10075     // From this point on, we need to have an ID of the target region defined.
10076     assert(OutlinedFnID && "Invalid outlined function ID!");
10077 
10078     // Emit device ID if any.
10079     llvm::Value *DeviceID;
10080     if (Device.getPointer()) {
10081       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10082               Device.getInt() == OMPC_DEVICE_device_num) &&
10083              "Expected device_num modifier.");
10084       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10085       DeviceID =
10086           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10087     } else {
10088       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10089     }
10090 
10091     // Emit the number of elements in the offloading arrays.
10092     llvm::Value *PointerNum =
10093         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10094 
10095     // Return value of the runtime offloading call.
10096     llvm::Value *Return;
10097 
10098     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10099     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10100 
10101     // Source location for the ident struct
10102     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10103 
10104     // Emit tripcount for the target loop-based directive.
10105     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10106 
10107     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108     // The target region is an outlined function launched by the runtime
10109     // via calls __tgt_target() or __tgt_target_teams().
10110     //
10111     // __tgt_target() launches a target region with one team and one thread,
10112     // executing a serial region.  This master thread may in turn launch
10113     // more threads within its team upon encountering a parallel region,
10114     // however, no additional teams can be launched on the device.
10115     //
10116     // __tgt_target_teams() launches a target region with one or more teams,
10117     // each with one or more threads.  This call is required for target
10118     // constructs such as:
10119     //  'target teams'
10120     //  'target' / 'teams'
10121     //  'target teams distribute parallel for'
10122     //  'target parallel'
10123     // and so on.
10124     //
10125     // Note that on the host and CPU targets, the runtime implementation of
10126     // these calls simply call the outlined function without forking threads.
10127     // The outlined functions themselves have runtime calls to
10128     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10129     // the compiler in emitTeamsCall() and emitParallelCall().
10130     //
10131     // In contrast, on the NVPTX target, the implementation of
10132     // __tgt_target_teams() launches a GPU kernel with the requested number
10133     // of teams and threads so no additional calls to the runtime are required.
10134     if (NumTeams) {
10135       // If we have NumTeams defined this means that we have an enclosed teams
10136       // region. Therefore we also expect to have NumThreads defined. These two
10137       // values should be defined in the presence of a teams directive,
10138       // regardless of having any clauses associated. If the user is using teams
10139       // but no clauses, these two values will be the default that should be
10140       // passed to the runtime library - a 32-bit integer with the value zero.
10141       assert(NumThreads && "Thread limit expression should be available along "
10142                            "with number of teams.");
10143       llvm::Value *OffloadingArgs[] = {RTLoc,
10144                                        DeviceID,
10145                                        OutlinedFnID,
10146                                        PointerNum,
10147                                        InputInfo.BasePointersArray.getPointer(),
10148                                        InputInfo.PointersArray.getPointer(),
10149                                        InputInfo.SizesArray.getPointer(),
10150                                        MapTypesArray,
10151                                        MapNamesArray,
10152                                        InputInfo.MappersArray.getPointer(),
10153                                        NumTeams,
10154                                        NumThreads};
10155       Return = CGF.EmitRuntimeCall(
10156           OMPBuilder.getOrCreateRuntimeFunction(
10157               CGM.getModule(), HasNowait
10158                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10159                                    : OMPRTL___tgt_target_teams_mapper),
10160           OffloadingArgs);
10161     } else {
10162       llvm::Value *OffloadingArgs[] = {RTLoc,
10163                                        DeviceID,
10164                                        OutlinedFnID,
10165                                        PointerNum,
10166                                        InputInfo.BasePointersArray.getPointer(),
10167                                        InputInfo.PointersArray.getPointer(),
10168                                        InputInfo.SizesArray.getPointer(),
10169                                        MapTypesArray,
10170                                        MapNamesArray,
10171                                        InputInfo.MappersArray.getPointer()};
10172       Return = CGF.EmitRuntimeCall(
10173           OMPBuilder.getOrCreateRuntimeFunction(
10174               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10175                                          : OMPRTL___tgt_target_mapper),
10176           OffloadingArgs);
10177     }
10178 
10179     // Check the error code and execute the host version if required.
10180     llvm::BasicBlock *OffloadFailedBlock =
10181         CGF.createBasicBlock("omp_offload.failed");
10182     llvm::BasicBlock *OffloadContBlock =
10183         CGF.createBasicBlock("omp_offload.cont");
10184     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10185     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10186 
10187     CGF.EmitBlock(OffloadFailedBlock);
10188     if (RequiresOuterTask) {
10189       CapturedVars.clear();
10190       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10191     }
10192     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10193     CGF.EmitBranch(OffloadContBlock);
10194 
10195     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10196   };
10197 
10198   // Notify that the host version must be executed.
10199   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10200                     RequiresOuterTask](CodeGenFunction &CGF,
10201                                        PrePostActionTy &) {
10202     if (RequiresOuterTask) {
10203       CapturedVars.clear();
10204       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10205     }
10206     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10207   };
10208 
10209   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10210                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10211                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10212     // Fill up the arrays with all the captured variables.
10213     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10214 
10215     // Get mappable expression information.
10216     MappableExprsHandler MEHandler(D, CGF);
10217     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10218     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10219 
10220     auto RI = CS.getCapturedRecordDecl()->field_begin();
10221     auto *CV = CapturedVars.begin();
10222     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10223                                               CE = CS.capture_end();
10224          CI != CE; ++CI, ++RI, ++CV) {
10225       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10226       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10227 
10228       // VLA sizes are passed to the outlined region by copy and do not have map
10229       // information associated.
10230       if (CI->capturesVariableArrayType()) {
10231         CurInfo.Exprs.push_back(nullptr);
10232         CurInfo.BasePointers.push_back(*CV);
10233         CurInfo.Pointers.push_back(*CV);
10234         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10235             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10236         // Copy to the device as an argument. No need to retrieve it.
10237         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10238                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10239                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10240         CurInfo.Mappers.push_back(nullptr);
10241       } else {
10242         // If we have any information in the map clause, we use it, otherwise we
10243         // just do a default mapping.
10244         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10245         if (!CI->capturesThis())
10246           MappedVarSet.insert(CI->getCapturedVar());
10247         else
10248           MappedVarSet.insert(nullptr);
10249         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10250           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10251         // Generate correct mapping for variables captured by reference in
10252         // lambdas.
10253         if (CI->capturesVariable())
10254           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10255                                                   CurInfo, LambdaPointers);
10256       }
10257       // We expect to have at least an element of information for this capture.
10258       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10259              "Non-existing map pointer for capture!");
10260       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10261              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10262              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10263              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10264              "Inconsistent map information sizes!");
10265 
10266       // If there is an entry in PartialStruct it means we have a struct with
10267       // individual members mapped. Emit an extra combined entry.
10268       if (PartialStruct.Base.isValid()) {
10269         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10270         MEHandler.emitCombinedEntry(
10271             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10272             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10273       }
10274 
10275       // We need to append the results of this capture to what we already have.
10276       CombinedInfo.append(CurInfo);
10277     }
10278     // Adjust MEMBER_OF flags for the lambdas captures.
10279     MEHandler.adjustMemberOfForLambdaCaptures(
10280         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10281         CombinedInfo.Types);
10282     // Map any list items in a map clause that were not captures because they
10283     // weren't referenced within the construct.
10284     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10285 
10286     TargetDataInfo Info;
10287     // Fill up the arrays and create the arguments.
10288     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10289     emitOffloadingArraysArgument(
10290         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10291         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10292         {/*ForEndTask=*/false});
10293 
10294     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10295     InputInfo.BasePointersArray =
10296         Address(Info.BasePointersArray, CGM.getPointerAlign());
10297     InputInfo.PointersArray =
10298         Address(Info.PointersArray, CGM.getPointerAlign());
10299     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10300     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10301     MapTypesArray = Info.MapTypesArray;
10302     MapNamesArray = Info.MapNamesArray;
10303     if (RequiresOuterTask)
10304       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10305     else
10306       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10307   };
10308 
10309   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10310                              CodeGenFunction &CGF, PrePostActionTy &) {
10311     if (RequiresOuterTask) {
10312       CodeGenFunction::OMPTargetDataInfo InputInfo;
10313       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10314     } else {
10315       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10316     }
10317   };
10318 
10319   // If we have a target function ID it means that we need to support
10320   // offloading, otherwise, just execute on the host. We need to execute on host
10321   // regardless of the conditional in the if clause if, e.g., the user do not
10322   // specify target triples.
10323   if (OutlinedFnID) {
10324     if (IfCond) {
10325       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10326     } else {
10327       RegionCodeGenTy ThenRCG(TargetThenGen);
10328       ThenRCG(CGF);
10329     }
10330   } else {
10331     RegionCodeGenTy ElseRCG(TargetElseGen);
10332     ElseRCG(CGF);
10333   }
10334 }
10335 
10336 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10337                                                     StringRef ParentName) {
10338   if (!S)
10339     return;
10340 
10341   // Codegen OMP target directives that offload compute to the device.
10342   bool RequiresDeviceCodegen =
10343       isa<OMPExecutableDirective>(S) &&
10344       isOpenMPTargetExecutionDirective(
10345           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10346 
10347   if (RequiresDeviceCodegen) {
10348     const auto &E = *cast<OMPExecutableDirective>(S);
10349     unsigned DeviceID;
10350     unsigned FileID;
10351     unsigned Line;
10352     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10353                              FileID, Line);
10354 
10355     // Is this a target region that should not be emitted as an entry point? If
10356     // so just signal we are done with this target region.
10357     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10358                                                             ParentName, Line))
10359       return;
10360 
10361     switch (E.getDirectiveKind()) {
10362     case OMPD_target:
10363       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10364                                                    cast<OMPTargetDirective>(E));
10365       break;
10366     case OMPD_target_parallel:
10367       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10368           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10369       break;
10370     case OMPD_target_teams:
10371       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10372           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10373       break;
10374     case OMPD_target_teams_distribute:
10375       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10376           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10377       break;
10378     case OMPD_target_teams_distribute_simd:
10379       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10380           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10381       break;
10382     case OMPD_target_parallel_for:
10383       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10384           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10385       break;
10386     case OMPD_target_parallel_for_simd:
10387       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10388           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10389       break;
10390     case OMPD_target_simd:
10391       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10392           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10393       break;
10394     case OMPD_target_teams_distribute_parallel_for:
10395       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10396           CGM, ParentName,
10397           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10398       break;
10399     case OMPD_target_teams_distribute_parallel_for_simd:
10400       CodeGenFunction::
10401           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10402               CGM, ParentName,
10403               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10404       break;
10405     case OMPD_parallel:
10406     case OMPD_for:
10407     case OMPD_parallel_for:
10408     case OMPD_parallel_master:
10409     case OMPD_parallel_sections:
10410     case OMPD_for_simd:
10411     case OMPD_parallel_for_simd:
10412     case OMPD_cancel:
10413     case OMPD_cancellation_point:
10414     case OMPD_ordered:
10415     case OMPD_threadprivate:
10416     case OMPD_allocate:
10417     case OMPD_task:
10418     case OMPD_simd:
10419     case OMPD_tile:
10420     case OMPD_sections:
10421     case OMPD_section:
10422     case OMPD_single:
10423     case OMPD_master:
10424     case OMPD_critical:
10425     case OMPD_taskyield:
10426     case OMPD_barrier:
10427     case OMPD_taskwait:
10428     case OMPD_taskgroup:
10429     case OMPD_atomic:
10430     case OMPD_flush:
10431     case OMPD_depobj:
10432     case OMPD_scan:
10433     case OMPD_teams:
10434     case OMPD_target_data:
10435     case OMPD_target_exit_data:
10436     case OMPD_target_enter_data:
10437     case OMPD_distribute:
10438     case OMPD_distribute_simd:
10439     case OMPD_distribute_parallel_for:
10440     case OMPD_distribute_parallel_for_simd:
10441     case OMPD_teams_distribute:
10442     case OMPD_teams_distribute_simd:
10443     case OMPD_teams_distribute_parallel_for:
10444     case OMPD_teams_distribute_parallel_for_simd:
10445     case OMPD_target_update:
10446     case OMPD_declare_simd:
10447     case OMPD_declare_variant:
10448     case OMPD_begin_declare_variant:
10449     case OMPD_end_declare_variant:
10450     case OMPD_declare_target:
10451     case OMPD_end_declare_target:
10452     case OMPD_declare_reduction:
10453     case OMPD_declare_mapper:
10454     case OMPD_taskloop:
10455     case OMPD_taskloop_simd:
10456     case OMPD_master_taskloop:
10457     case OMPD_master_taskloop_simd:
10458     case OMPD_parallel_master_taskloop:
10459     case OMPD_parallel_master_taskloop_simd:
10460     case OMPD_requires:
10461     case OMPD_unknown:
10462     default:
10463       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10464     }
10465     return;
10466   }
10467 
10468   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10469     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10470       return;
10471 
10472     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10473     return;
10474   }
10475 
10476   // If this is a lambda function, look into its body.
10477   if (const auto *L = dyn_cast<LambdaExpr>(S))
10478     S = L->getBody();
10479 
10480   // Keep looking for target regions recursively.
10481   for (const Stmt *II : S->children())
10482     scanForTargetRegionsFunctions(II, ParentName);
10483 }
10484 
10485 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10486   // If emitting code for the host, we do not process FD here. Instead we do
10487   // the normal code generation.
10488   if (!CGM.getLangOpts().OpenMPIsDevice) {
10489     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10490       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10491           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10492       // Do not emit device_type(nohost) functions for the host.
10493       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10494         return true;
10495     }
10496     return false;
10497   }
10498 
10499   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10500   // Try to detect target regions in the function.
10501   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10502     StringRef Name = CGM.getMangledName(GD);
10503     scanForTargetRegionsFunctions(FD->getBody(), Name);
10504     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10505         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10506     // Do not emit device_type(nohost) functions for the host.
10507     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10508       return true;
10509   }
10510 
10511   // Do not to emit function if it is not marked as declare target.
10512   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10513          AlreadyEmittedTargetDecls.count(VD) == 0;
10514 }
10515 
10516 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10517   if (!CGM.getLangOpts().OpenMPIsDevice)
10518     return false;
10519 
10520   // Check if there are Ctors/Dtors in this declaration and look for target
10521   // regions in it. We use the complete variant to produce the kernel name
10522   // mangling.
10523   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10524   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10525     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10526       StringRef ParentName =
10527           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10528       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10529     }
10530     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10531       StringRef ParentName =
10532           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10533       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10534     }
10535   }
10536 
10537   // Do not to emit variable if it is not marked as declare target.
10538   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10539       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10540           cast<VarDecl>(GD.getDecl()));
10541   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10542       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10543        HasRequiresUnifiedSharedMemory)) {
10544     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10545     return true;
10546   }
10547   return false;
10548 }
10549 
10550 llvm::Constant *
10551 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10552                                                 const VarDecl *VD) {
10553   assert(VD->getType().isConstant(CGM.getContext()) &&
10554          "Expected constant variable.");
10555   StringRef VarName;
10556   llvm::Constant *Addr;
10557   llvm::GlobalValue::LinkageTypes Linkage;
10558   QualType Ty = VD->getType();
10559   SmallString<128> Buffer;
10560   {
10561     unsigned DeviceID;
10562     unsigned FileID;
10563     unsigned Line;
10564     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10565                              FileID, Line);
10566     llvm::raw_svector_ostream OS(Buffer);
10567     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10568        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10569     VarName = OS.str();
10570   }
10571   Linkage = llvm::GlobalValue::InternalLinkage;
10572   Addr =
10573       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10574                                   getDefaultFirstprivateAddressSpace());
10575   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10576   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10577   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10578   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10579       VarName, Addr, VarSize,
10580       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10581   return Addr;
10582 }
10583 
10584 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10585                                                    llvm::Constant *Addr) {
10586   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10587       !CGM.getLangOpts().OpenMPIsDevice)
10588     return;
10589   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10590       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10591   if (!Res) {
10592     if (CGM.getLangOpts().OpenMPIsDevice) {
10593       // Register non-target variables being emitted in device code (debug info
10594       // may cause this).
10595       StringRef VarName = CGM.getMangledName(VD);
10596       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10597     }
10598     return;
10599   }
10600   // Register declare target variables.
10601   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10602   StringRef VarName;
10603   CharUnits VarSize;
10604   llvm::GlobalValue::LinkageTypes Linkage;
10605 
10606   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10607       !HasRequiresUnifiedSharedMemory) {
10608     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10609     VarName = CGM.getMangledName(VD);
10610     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10611       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10612       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10613     } else {
10614       VarSize = CharUnits::Zero();
10615     }
10616     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10617     // Temp solution to prevent optimizations of the internal variables.
10618     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10619       std::string RefName = getName({VarName, "ref"});
10620       if (!CGM.GetGlobalValue(RefName)) {
10621         llvm::Constant *AddrRef =
10622             getOrCreateInternalVariable(Addr->getType(), RefName);
10623         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10624         GVAddrRef->setConstant(/*Val=*/true);
10625         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10626         GVAddrRef->setInitializer(Addr);
10627         CGM.addCompilerUsedGlobal(GVAddrRef);
10628       }
10629     }
10630   } else {
10631     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10632             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10633              HasRequiresUnifiedSharedMemory)) &&
10634            "Declare target attribute must link or to with unified memory.");
10635     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10636       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10637     else
10638       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10639 
10640     if (CGM.getLangOpts().OpenMPIsDevice) {
10641       VarName = Addr->getName();
10642       Addr = nullptr;
10643     } else {
10644       VarName = getAddrOfDeclareTargetVar(VD).getName();
10645       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10646     }
10647     VarSize = CGM.getPointerSize();
10648     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10649   }
10650 
10651   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10652       VarName, Addr, VarSize, Flags, Linkage);
10653 }
10654 
10655 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10656   if (isa<FunctionDecl>(GD.getDecl()) ||
10657       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10658     return emitTargetFunctions(GD);
10659 
10660   return emitTargetGlobalVariable(GD);
10661 }
10662 
10663 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10664   for (const VarDecl *VD : DeferredGlobalVariables) {
10665     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10666         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10667     if (!Res)
10668       continue;
10669     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10670         !HasRequiresUnifiedSharedMemory) {
10671       CGM.EmitGlobal(VD);
10672     } else {
10673       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10674               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10675                HasRequiresUnifiedSharedMemory)) &&
10676              "Expected link clause or to clause with unified memory.");
10677       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10678     }
10679   }
10680 }
10681 
10682 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10683     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10684   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10685          " Expected target-based directive.");
10686 }
10687 
10688 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10689   for (const OMPClause *Clause : D->clauselists()) {
10690     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10691       HasRequiresUnifiedSharedMemory = true;
10692     } else if (const auto *AC =
10693                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10694       switch (AC->getAtomicDefaultMemOrderKind()) {
10695       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10696         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10697         break;
10698       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10699         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10700         break;
10701       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10702         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10703         break;
10704       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10705         break;
10706       }
10707     }
10708   }
10709 }
10710 
10711 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10712   return RequiresAtomicOrdering;
10713 }
10714 
10715 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10716                                                        LangAS &AS) {
10717   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10718     return false;
10719   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10720   switch(A->getAllocatorType()) {
10721   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10722   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10723   // Not supported, fallback to the default mem space.
10724   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10725   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10726   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10727   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10728   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10729   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10730   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10731     AS = LangAS::Default;
10732     return true;
10733   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10734     llvm_unreachable("Expected predefined allocator for the variables with the "
10735                      "static storage.");
10736   }
10737   return false;
10738 }
10739 
10740 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10741   return HasRequiresUnifiedSharedMemory;
10742 }
10743 
10744 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10745     CodeGenModule &CGM)
10746     : CGM(CGM) {
10747   if (CGM.getLangOpts().OpenMPIsDevice) {
10748     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10749     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10750   }
10751 }
10752 
10753 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10754   if (CGM.getLangOpts().OpenMPIsDevice)
10755     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10756 }
10757 
10758 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10759   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10760     return true;
10761 
10762   const auto *D = cast<FunctionDecl>(GD.getDecl());
10763   // Do not to emit function if it is marked as declare target as it was already
10764   // emitted.
10765   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10766     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10767       if (auto *F = dyn_cast_or_null<llvm::Function>(
10768               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10769         return !F->isDeclaration();
10770       return false;
10771     }
10772     return true;
10773   }
10774 
10775   return !AlreadyEmittedTargetDecls.insert(D).second;
10776 }
10777 
10778 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10779   // If we don't have entries or if we are emitting code for the device, we
10780   // don't need to do anything.
10781   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10782       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10783       (OffloadEntriesInfoManager.empty() &&
10784        !HasEmittedDeclareTargetRegion &&
10785        !HasEmittedTargetRegion))
10786     return nullptr;
10787 
10788   // Create and register the function that handles the requires directives.
10789   ASTContext &C = CGM.getContext();
10790 
10791   llvm::Function *RequiresRegFn;
10792   {
10793     CodeGenFunction CGF(CGM);
10794     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10795     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10796     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10797     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10798     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10799     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10800     // TODO: check for other requires clauses.
10801     // The requires directive takes effect only when a target region is
10802     // present in the compilation unit. Otherwise it is ignored and not
10803     // passed to the runtime. This avoids the runtime from throwing an error
10804     // for mismatching requires clauses across compilation units that don't
10805     // contain at least 1 target region.
10806     assert((HasEmittedTargetRegion ||
10807             HasEmittedDeclareTargetRegion ||
10808             !OffloadEntriesInfoManager.empty()) &&
10809            "Target or declare target region expected.");
10810     if (HasRequiresUnifiedSharedMemory)
10811       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10812     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10813                             CGM.getModule(), OMPRTL___tgt_register_requires),
10814                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10815     CGF.FinishFunction();
10816   }
10817   return RequiresRegFn;
10818 }
10819 
10820 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10821                                     const OMPExecutableDirective &D,
10822                                     SourceLocation Loc,
10823                                     llvm::Function *OutlinedFn,
10824                                     ArrayRef<llvm::Value *> CapturedVars) {
10825   if (!CGF.HaveInsertPoint())
10826     return;
10827 
10828   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10829   CodeGenFunction::RunCleanupsScope Scope(CGF);
10830 
10831   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10832   llvm::Value *Args[] = {
10833       RTLoc,
10834       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10835       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10836   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10837   RealArgs.append(std::begin(Args), std::end(Args));
10838   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10839 
10840   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10841       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10842   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10843 }
10844 
10845 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10846                                          const Expr *NumTeams,
10847                                          const Expr *ThreadLimit,
10848                                          SourceLocation Loc) {
10849   if (!CGF.HaveInsertPoint())
10850     return;
10851 
10852   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10853 
10854   llvm::Value *NumTeamsVal =
10855       NumTeams
10856           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10857                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10858           : CGF.Builder.getInt32(0);
10859 
10860   llvm::Value *ThreadLimitVal =
10861       ThreadLimit
10862           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10863                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10864           : CGF.Builder.getInt32(0);
10865 
10866   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10867   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10868                                      ThreadLimitVal};
10869   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10870                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10871                       PushNumTeamsArgs);
10872 }
10873 
10874 void CGOpenMPRuntime::emitTargetDataCalls(
10875     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10876     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10877   if (!CGF.HaveInsertPoint())
10878     return;
10879 
10880   // Action used to replace the default codegen action and turn privatization
10881   // off.
10882   PrePostActionTy NoPrivAction;
10883 
10884   // Generate the code for the opening of the data environment. Capture all the
10885   // arguments of the runtime call by reference because they are used in the
10886   // closing of the region.
10887   auto &&BeginThenGen = [this, &D, Device, &Info,
10888                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10889     // Fill up the arrays with all the mapped variables.
10890     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10891 
10892     // Get map clause information.
10893     MappableExprsHandler MEHandler(D, CGF);
10894     MEHandler.generateAllInfo(CombinedInfo);
10895 
10896     // Fill up the arrays and create the arguments.
10897     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10898                          /*IsNonContiguous=*/true);
10899 
10900     llvm::Value *BasePointersArrayArg = nullptr;
10901     llvm::Value *PointersArrayArg = nullptr;
10902     llvm::Value *SizesArrayArg = nullptr;
10903     llvm::Value *MapTypesArrayArg = nullptr;
10904     llvm::Value *MapNamesArrayArg = nullptr;
10905     llvm::Value *MappersArrayArg = nullptr;
10906     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10907                                  SizesArrayArg, MapTypesArrayArg,
10908                                  MapNamesArrayArg, MappersArrayArg, Info);
10909 
10910     // Emit device ID if any.
10911     llvm::Value *DeviceID = nullptr;
10912     if (Device) {
10913       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10914                                            CGF.Int64Ty, /*isSigned=*/true);
10915     } else {
10916       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10917     }
10918 
10919     // Emit the number of elements in the offloading arrays.
10920     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10921     //
10922     // Source location for the ident struct
10923     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10924 
10925     llvm::Value *OffloadingArgs[] = {RTLoc,
10926                                      DeviceID,
10927                                      PointerNum,
10928                                      BasePointersArrayArg,
10929                                      PointersArrayArg,
10930                                      SizesArrayArg,
10931                                      MapTypesArrayArg,
10932                                      MapNamesArrayArg,
10933                                      MappersArrayArg};
10934     CGF.EmitRuntimeCall(
10935         OMPBuilder.getOrCreateRuntimeFunction(
10936             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10937         OffloadingArgs);
10938 
10939     // If device pointer privatization is required, emit the body of the region
10940     // here. It will have to be duplicated: with and without privatization.
10941     if (!Info.CaptureDeviceAddrMap.empty())
10942       CodeGen(CGF);
10943   };
10944 
10945   // Generate code for the closing of the data region.
10946   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10947                                                 PrePostActionTy &) {
10948     assert(Info.isValid() && "Invalid data environment closing arguments.");
10949 
10950     llvm::Value *BasePointersArrayArg = nullptr;
10951     llvm::Value *PointersArrayArg = nullptr;
10952     llvm::Value *SizesArrayArg = nullptr;
10953     llvm::Value *MapTypesArrayArg = nullptr;
10954     llvm::Value *MapNamesArrayArg = nullptr;
10955     llvm::Value *MappersArrayArg = nullptr;
10956     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10957                                  SizesArrayArg, MapTypesArrayArg,
10958                                  MapNamesArrayArg, MappersArrayArg, Info,
10959                                  {/*ForEndCall=*/true});
10960 
10961     // Emit device ID if any.
10962     llvm::Value *DeviceID = nullptr;
10963     if (Device) {
10964       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10965                                            CGF.Int64Ty, /*isSigned=*/true);
10966     } else {
10967       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10968     }
10969 
10970     // Emit the number of elements in the offloading arrays.
10971     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10972 
10973     // Source location for the ident struct
10974     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10975 
10976     llvm::Value *OffloadingArgs[] = {RTLoc,
10977                                      DeviceID,
10978                                      PointerNum,
10979                                      BasePointersArrayArg,
10980                                      PointersArrayArg,
10981                                      SizesArrayArg,
10982                                      MapTypesArrayArg,
10983                                      MapNamesArrayArg,
10984                                      MappersArrayArg};
10985     CGF.EmitRuntimeCall(
10986         OMPBuilder.getOrCreateRuntimeFunction(
10987             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10988         OffloadingArgs);
10989   };
10990 
10991   // If we need device pointer privatization, we need to emit the body of the
10992   // region with no privatization in the 'else' branch of the conditional.
10993   // Otherwise, we don't have to do anything.
10994   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10995                                                          PrePostActionTy &) {
10996     if (!Info.CaptureDeviceAddrMap.empty()) {
10997       CodeGen.setAction(NoPrivAction);
10998       CodeGen(CGF);
10999     }
11000   };
11001 
11002   // We don't have to do anything to close the region if the if clause evaluates
11003   // to false.
11004   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11005 
11006   if (IfCond) {
11007     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11008   } else {
11009     RegionCodeGenTy RCG(BeginThenGen);
11010     RCG(CGF);
11011   }
11012 
11013   // If we don't require privatization of device pointers, we emit the body in
11014   // between the runtime calls. This avoids duplicating the body code.
11015   if (Info.CaptureDeviceAddrMap.empty()) {
11016     CodeGen.setAction(NoPrivAction);
11017     CodeGen(CGF);
11018   }
11019 
11020   if (IfCond) {
11021     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11022   } else {
11023     RegionCodeGenTy RCG(EndThenGen);
11024     RCG(CGF);
11025   }
11026 }
11027 
11028 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11029     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11030     const Expr *Device) {
11031   if (!CGF.HaveInsertPoint())
11032     return;
11033 
11034   assert((isa<OMPTargetEnterDataDirective>(D) ||
11035           isa<OMPTargetExitDataDirective>(D) ||
11036           isa<OMPTargetUpdateDirective>(D)) &&
11037          "Expecting either target enter, exit data, or update directives.");
11038 
11039   CodeGenFunction::OMPTargetDataInfo InputInfo;
11040   llvm::Value *MapTypesArray = nullptr;
11041   llvm::Value *MapNamesArray = nullptr;
11042   // Generate the code for the opening of the data environment.
11043   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11044                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11045     // Emit device ID if any.
11046     llvm::Value *DeviceID = nullptr;
11047     if (Device) {
11048       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11049                                            CGF.Int64Ty, /*isSigned=*/true);
11050     } else {
11051       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11052     }
11053 
11054     // Emit the number of elements in the offloading arrays.
11055     llvm::Constant *PointerNum =
11056         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11057 
11058     // Source location for the ident struct
11059     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11060 
11061     llvm::Value *OffloadingArgs[] = {RTLoc,
11062                                      DeviceID,
11063                                      PointerNum,
11064                                      InputInfo.BasePointersArray.getPointer(),
11065                                      InputInfo.PointersArray.getPointer(),
11066                                      InputInfo.SizesArray.getPointer(),
11067                                      MapTypesArray,
11068                                      MapNamesArray,
11069                                      InputInfo.MappersArray.getPointer()};
11070 
11071     // Select the right runtime function call for each standalone
11072     // directive.
11073     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11074     RuntimeFunction RTLFn;
11075     switch (D.getDirectiveKind()) {
11076     case OMPD_target_enter_data:
11077       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11078                         : OMPRTL___tgt_target_data_begin_mapper;
11079       break;
11080     case OMPD_target_exit_data:
11081       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11082                         : OMPRTL___tgt_target_data_end_mapper;
11083       break;
11084     case OMPD_target_update:
11085       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11086                         : OMPRTL___tgt_target_data_update_mapper;
11087       break;
11088     case OMPD_parallel:
11089     case OMPD_for:
11090     case OMPD_parallel_for:
11091     case OMPD_parallel_master:
11092     case OMPD_parallel_sections:
11093     case OMPD_for_simd:
11094     case OMPD_parallel_for_simd:
11095     case OMPD_cancel:
11096     case OMPD_cancellation_point:
11097     case OMPD_ordered:
11098     case OMPD_threadprivate:
11099     case OMPD_allocate:
11100     case OMPD_task:
11101     case OMPD_simd:
11102     case OMPD_tile:
11103     case OMPD_sections:
11104     case OMPD_section:
11105     case OMPD_single:
11106     case OMPD_master:
11107     case OMPD_critical:
11108     case OMPD_taskyield:
11109     case OMPD_barrier:
11110     case OMPD_taskwait:
11111     case OMPD_taskgroup:
11112     case OMPD_atomic:
11113     case OMPD_flush:
11114     case OMPD_depobj:
11115     case OMPD_scan:
11116     case OMPD_teams:
11117     case OMPD_target_data:
11118     case OMPD_distribute:
11119     case OMPD_distribute_simd:
11120     case OMPD_distribute_parallel_for:
11121     case OMPD_distribute_parallel_for_simd:
11122     case OMPD_teams_distribute:
11123     case OMPD_teams_distribute_simd:
11124     case OMPD_teams_distribute_parallel_for:
11125     case OMPD_teams_distribute_parallel_for_simd:
11126     case OMPD_declare_simd:
11127     case OMPD_declare_variant:
11128     case OMPD_begin_declare_variant:
11129     case OMPD_end_declare_variant:
11130     case OMPD_declare_target:
11131     case OMPD_end_declare_target:
11132     case OMPD_declare_reduction:
11133     case OMPD_declare_mapper:
11134     case OMPD_taskloop:
11135     case OMPD_taskloop_simd:
11136     case OMPD_master_taskloop:
11137     case OMPD_master_taskloop_simd:
11138     case OMPD_parallel_master_taskloop:
11139     case OMPD_parallel_master_taskloop_simd:
11140     case OMPD_target:
11141     case OMPD_target_simd:
11142     case OMPD_target_teams_distribute:
11143     case OMPD_target_teams_distribute_simd:
11144     case OMPD_target_teams_distribute_parallel_for:
11145     case OMPD_target_teams_distribute_parallel_for_simd:
11146     case OMPD_target_teams:
11147     case OMPD_target_parallel:
11148     case OMPD_target_parallel_for:
11149     case OMPD_target_parallel_for_simd:
11150     case OMPD_requires:
11151     case OMPD_unknown:
11152     default:
11153       llvm_unreachable("Unexpected standalone target data directive.");
11154       break;
11155     }
11156     CGF.EmitRuntimeCall(
11157         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11158         OffloadingArgs);
11159   };
11160 
11161   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11162                           &MapNamesArray](CodeGenFunction &CGF,
11163                                           PrePostActionTy &) {
11164     // Fill up the arrays with all the mapped variables.
11165     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11166 
11167     // Get map clause information.
11168     MappableExprsHandler MEHandler(D, CGF);
11169     MEHandler.generateAllInfo(CombinedInfo);
11170 
11171     TargetDataInfo Info;
11172     // Fill up the arrays and create the arguments.
11173     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11174                          /*IsNonContiguous=*/true);
11175     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11176                              D.hasClausesOfKind<OMPNowaitClause>();
11177     emitOffloadingArraysArgument(
11178         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11179         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11180         {/*ForEndTask=*/false});
11181     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11182     InputInfo.BasePointersArray =
11183         Address(Info.BasePointersArray, CGM.getPointerAlign());
11184     InputInfo.PointersArray =
11185         Address(Info.PointersArray, CGM.getPointerAlign());
11186     InputInfo.SizesArray =
11187         Address(Info.SizesArray, CGM.getPointerAlign());
11188     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11189     MapTypesArray = Info.MapTypesArray;
11190     MapNamesArray = Info.MapNamesArray;
11191     if (RequiresOuterTask)
11192       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11193     else
11194       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11195   };
11196 
11197   if (IfCond) {
11198     emitIfClause(CGF, IfCond, TargetThenGen,
11199                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11200   } else {
11201     RegionCodeGenTy ThenRCG(TargetThenGen);
11202     ThenRCG(CGF);
11203   }
11204 }
11205 
11206 namespace {
11207   /// Kind of parameter in a function with 'declare simd' directive.
11208   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11209   /// Attribute set of the parameter.
11210   struct ParamAttrTy {
11211     ParamKindTy Kind = Vector;
11212     llvm::APSInt StrideOrArg;
11213     llvm::APSInt Alignment;
11214   };
11215 } // namespace
11216 
11217 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11218                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11219   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11220   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11221   // of that clause. The VLEN value must be power of 2.
11222   // In other case the notion of the function`s "characteristic data type" (CDT)
11223   // is used to compute the vector length.
11224   // CDT is defined in the following order:
11225   //   a) For non-void function, the CDT is the return type.
11226   //   b) If the function has any non-uniform, non-linear parameters, then the
11227   //   CDT is the type of the first such parameter.
11228   //   c) If the CDT determined by a) or b) above is struct, union, or class
11229   //   type which is pass-by-value (except for the type that maps to the
11230   //   built-in complex data type), the characteristic data type is int.
11231   //   d) If none of the above three cases is applicable, the CDT is int.
11232   // The VLEN is then determined based on the CDT and the size of vector
11233   // register of that ISA for which current vector version is generated. The
11234   // VLEN is computed using the formula below:
11235   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11236   // where vector register size specified in section 3.2.1 Registers and the
11237   // Stack Frame of original AMD64 ABI document.
11238   QualType RetType = FD->getReturnType();
11239   if (RetType.isNull())
11240     return 0;
11241   ASTContext &C = FD->getASTContext();
11242   QualType CDT;
11243   if (!RetType.isNull() && !RetType->isVoidType()) {
11244     CDT = RetType;
11245   } else {
11246     unsigned Offset = 0;
11247     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11248       if (ParamAttrs[Offset].Kind == Vector)
11249         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11250       ++Offset;
11251     }
11252     if (CDT.isNull()) {
11253       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11254         if (ParamAttrs[I + Offset].Kind == Vector) {
11255           CDT = FD->getParamDecl(I)->getType();
11256           break;
11257         }
11258       }
11259     }
11260   }
11261   if (CDT.isNull())
11262     CDT = C.IntTy;
11263   CDT = CDT->getCanonicalTypeUnqualified();
11264   if (CDT->isRecordType() || CDT->isUnionType())
11265     CDT = C.IntTy;
11266   return C.getTypeSize(CDT);
11267 }
11268 
11269 static void
11270 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11271                            const llvm::APSInt &VLENVal,
11272                            ArrayRef<ParamAttrTy> ParamAttrs,
11273                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11274   struct ISADataTy {
11275     char ISA;
11276     unsigned VecRegSize;
11277   };
11278   ISADataTy ISAData[] = {
11279       {
11280           'b', 128
11281       }, // SSE
11282       {
11283           'c', 256
11284       }, // AVX
11285       {
11286           'd', 256
11287       }, // AVX2
11288       {
11289           'e', 512
11290       }, // AVX512
11291   };
11292   llvm::SmallVector<char, 2> Masked;
11293   switch (State) {
11294   case OMPDeclareSimdDeclAttr::BS_Undefined:
11295     Masked.push_back('N');
11296     Masked.push_back('M');
11297     break;
11298   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11299     Masked.push_back('N');
11300     break;
11301   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11302     Masked.push_back('M');
11303     break;
11304   }
11305   for (char Mask : Masked) {
11306     for (const ISADataTy &Data : ISAData) {
11307       SmallString<256> Buffer;
11308       llvm::raw_svector_ostream Out(Buffer);
11309       Out << "_ZGV" << Data.ISA << Mask;
11310       if (!VLENVal) {
11311         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11312         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11313         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11314       } else {
11315         Out << VLENVal;
11316       }
11317       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11318         switch (ParamAttr.Kind){
11319         case LinearWithVarStride:
11320           Out << 's' << ParamAttr.StrideOrArg;
11321           break;
11322         case Linear:
11323           Out << 'l';
11324           if (ParamAttr.StrideOrArg != 1)
11325             Out << ParamAttr.StrideOrArg;
11326           break;
11327         case Uniform:
11328           Out << 'u';
11329           break;
11330         case Vector:
11331           Out << 'v';
11332           break;
11333         }
11334         if (!!ParamAttr.Alignment)
11335           Out << 'a' << ParamAttr.Alignment;
11336       }
11337       Out << '_' << Fn->getName();
11338       Fn->addFnAttr(Out.str());
11339     }
11340   }
11341 }
11342 
11343 // This are the Functions that are needed to mangle the name of the
11344 // vector functions generated by the compiler, according to the rules
11345 // defined in the "Vector Function ABI specifications for AArch64",
11346 // available at
11347 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11348 
11349 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11350 ///
11351 /// TODO: Need to implement the behavior for reference marked with a
11352 /// var or no linear modifiers (1.b in the section). For this, we
11353 /// need to extend ParamKindTy to support the linear modifiers.
11354 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11355   QT = QT.getCanonicalType();
11356 
11357   if (QT->isVoidType())
11358     return false;
11359 
11360   if (Kind == ParamKindTy::Uniform)
11361     return false;
11362 
11363   if (Kind == ParamKindTy::Linear)
11364     return false;
11365 
11366   // TODO: Handle linear references with modifiers
11367 
11368   if (Kind == ParamKindTy::LinearWithVarStride)
11369     return false;
11370 
11371   return true;
11372 }
11373 
11374 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11375 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11376   QT = QT.getCanonicalType();
11377   unsigned Size = C.getTypeSize(QT);
11378 
11379   // Only scalars and complex within 16 bytes wide set PVB to true.
11380   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11381     return false;
11382 
11383   if (QT->isFloatingType())
11384     return true;
11385 
11386   if (QT->isIntegerType())
11387     return true;
11388 
11389   if (QT->isPointerType())
11390     return true;
11391 
11392   // TODO: Add support for complex types (section 3.1.2, item 2).
11393 
11394   return false;
11395 }
11396 
11397 /// Computes the lane size (LS) of a return type or of an input parameter,
11398 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11399 /// TODO: Add support for references, section 3.2.1, item 1.
11400 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11401   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11402     QualType PTy = QT.getCanonicalType()->getPointeeType();
11403     if (getAArch64PBV(PTy, C))
11404       return C.getTypeSize(PTy);
11405   }
11406   if (getAArch64PBV(QT, C))
11407     return C.getTypeSize(QT);
11408 
11409   return C.getTypeSize(C.getUIntPtrType());
11410 }
11411 
11412 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11413 // signature of the scalar function, as defined in 3.2.2 of the
11414 // AAVFABI.
11415 static std::tuple<unsigned, unsigned, bool>
11416 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11417   QualType RetType = FD->getReturnType().getCanonicalType();
11418 
11419   ASTContext &C = FD->getASTContext();
11420 
11421   bool OutputBecomesInput = false;
11422 
11423   llvm::SmallVector<unsigned, 8> Sizes;
11424   if (!RetType->isVoidType()) {
11425     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11426     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11427       OutputBecomesInput = true;
11428   }
11429   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11430     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11431     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11432   }
11433 
11434   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11435   // The LS of a function parameter / return value can only be a power
11436   // of 2, starting from 8 bits, up to 128.
11437   assert(std::all_of(Sizes.begin(), Sizes.end(),
11438                      [](unsigned Size) {
11439                        return Size == 8 || Size == 16 || Size == 32 ||
11440                               Size == 64 || Size == 128;
11441                      }) &&
11442          "Invalid size");
11443 
11444   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11445                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11446                          OutputBecomesInput);
11447 }
11448 
11449 /// Mangle the parameter part of the vector function name according to
11450 /// their OpenMP classification. The mangling function is defined in
11451 /// section 3.5 of the AAVFABI.
11452 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11453   SmallString<256> Buffer;
11454   llvm::raw_svector_ostream Out(Buffer);
11455   for (const auto &ParamAttr : ParamAttrs) {
11456     switch (ParamAttr.Kind) {
11457     case LinearWithVarStride:
11458       Out << "ls" << ParamAttr.StrideOrArg;
11459       break;
11460     case Linear:
11461       Out << 'l';
11462       // Don't print the step value if it is not present or if it is
11463       // equal to 1.
11464       if (ParamAttr.StrideOrArg != 1)
11465         Out << ParamAttr.StrideOrArg;
11466       break;
11467     case Uniform:
11468       Out << 'u';
11469       break;
11470     case Vector:
11471       Out << 'v';
11472       break;
11473     }
11474 
11475     if (!!ParamAttr.Alignment)
11476       Out << 'a' << ParamAttr.Alignment;
11477   }
11478 
11479   return std::string(Out.str());
11480 }
11481 
11482 // Function used to add the attribute. The parameter `VLEN` is
11483 // templated to allow the use of "x" when targeting scalable functions
11484 // for SVE.
11485 template <typename T>
11486 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11487                                  char ISA, StringRef ParSeq,
11488                                  StringRef MangledName, bool OutputBecomesInput,
11489                                  llvm::Function *Fn) {
11490   SmallString<256> Buffer;
11491   llvm::raw_svector_ostream Out(Buffer);
11492   Out << Prefix << ISA << LMask << VLEN;
11493   if (OutputBecomesInput)
11494     Out << "v";
11495   Out << ParSeq << "_" << MangledName;
11496   Fn->addFnAttr(Out.str());
11497 }
11498 
11499 // Helper function to generate the Advanced SIMD names depending on
11500 // the value of the NDS when simdlen is not present.
11501 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11502                                       StringRef Prefix, char ISA,
11503                                       StringRef ParSeq, StringRef MangledName,
11504                                       bool OutputBecomesInput,
11505                                       llvm::Function *Fn) {
11506   switch (NDS) {
11507   case 8:
11508     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11509                          OutputBecomesInput, Fn);
11510     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11511                          OutputBecomesInput, Fn);
11512     break;
11513   case 16:
11514     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11515                          OutputBecomesInput, Fn);
11516     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11517                          OutputBecomesInput, Fn);
11518     break;
11519   case 32:
11520     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11521                          OutputBecomesInput, Fn);
11522     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11523                          OutputBecomesInput, Fn);
11524     break;
11525   case 64:
11526   case 128:
11527     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11528                          OutputBecomesInput, Fn);
11529     break;
11530   default:
11531     llvm_unreachable("Scalar type is too wide.");
11532   }
11533 }
11534 
11535 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11536 static void emitAArch64DeclareSimdFunction(
11537     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11538     ArrayRef<ParamAttrTy> ParamAttrs,
11539     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11540     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11541 
11542   // Get basic data for building the vector signature.
11543   const auto Data = getNDSWDS(FD, ParamAttrs);
11544   const unsigned NDS = std::get<0>(Data);
11545   const unsigned WDS = std::get<1>(Data);
11546   const bool OutputBecomesInput = std::get<2>(Data);
11547 
11548   // Check the values provided via `simdlen` by the user.
11549   // 1. A `simdlen(1)` doesn't produce vector signatures,
11550   if (UserVLEN == 1) {
11551     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11552         DiagnosticsEngine::Warning,
11553         "The clause simdlen(1) has no effect when targeting aarch64.");
11554     CGM.getDiags().Report(SLoc, DiagID);
11555     return;
11556   }
11557 
11558   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11559   // Advanced SIMD output.
11560   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11561     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11562         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11563                                     "power of 2 when targeting Advanced SIMD.");
11564     CGM.getDiags().Report(SLoc, DiagID);
11565     return;
11566   }
11567 
11568   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11569   // limits.
11570   if (ISA == 's' && UserVLEN != 0) {
11571     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11572       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11573           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11574                                       "lanes in the architectural constraints "
11575                                       "for SVE (min is 128-bit, max is "
11576                                       "2048-bit, by steps of 128-bit)");
11577       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11578       return;
11579     }
11580   }
11581 
11582   // Sort out parameter sequence.
11583   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11584   StringRef Prefix = "_ZGV";
11585   // Generate simdlen from user input (if any).
11586   if (UserVLEN) {
11587     if (ISA == 's') {
11588       // SVE generates only a masked function.
11589       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11590                            OutputBecomesInput, Fn);
11591     } else {
11592       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11593       // Advanced SIMD generates one or two functions, depending on
11594       // the `[not]inbranch` clause.
11595       switch (State) {
11596       case OMPDeclareSimdDeclAttr::BS_Undefined:
11597         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11598                              OutputBecomesInput, Fn);
11599         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11600                              OutputBecomesInput, Fn);
11601         break;
11602       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11603         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11604                              OutputBecomesInput, Fn);
11605         break;
11606       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11607         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11608                              OutputBecomesInput, Fn);
11609         break;
11610       }
11611     }
11612   } else {
11613     // If no user simdlen is provided, follow the AAVFABI rules for
11614     // generating the vector length.
11615     if (ISA == 's') {
11616       // SVE, section 3.4.1, item 1.
11617       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11618                            OutputBecomesInput, Fn);
11619     } else {
11620       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11621       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11622       // two vector names depending on the use of the clause
11623       // `[not]inbranch`.
11624       switch (State) {
11625       case OMPDeclareSimdDeclAttr::BS_Undefined:
11626         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11627                                   OutputBecomesInput, Fn);
11628         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11629                                   OutputBecomesInput, Fn);
11630         break;
11631       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11632         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11633                                   OutputBecomesInput, Fn);
11634         break;
11635       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11636         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11637                                   OutputBecomesInput, Fn);
11638         break;
11639       }
11640     }
11641   }
11642 }
11643 
11644 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11645                                               llvm::Function *Fn) {
11646   ASTContext &C = CGM.getContext();
11647   FD = FD->getMostRecentDecl();
11648   // Map params to their positions in function decl.
11649   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11650   if (isa<CXXMethodDecl>(FD))
11651     ParamPositions.try_emplace(FD, 0);
11652   unsigned ParamPos = ParamPositions.size();
11653   for (const ParmVarDecl *P : FD->parameters()) {
11654     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11655     ++ParamPos;
11656   }
11657   while (FD) {
11658     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11659       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11660       // Mark uniform parameters.
11661       for (const Expr *E : Attr->uniforms()) {
11662         E = E->IgnoreParenImpCasts();
11663         unsigned Pos;
11664         if (isa<CXXThisExpr>(E)) {
11665           Pos = ParamPositions[FD];
11666         } else {
11667           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11668                                 ->getCanonicalDecl();
11669           Pos = ParamPositions[PVD];
11670         }
11671         ParamAttrs[Pos].Kind = Uniform;
11672       }
11673       // Get alignment info.
11674       auto NI = Attr->alignments_begin();
11675       for (const Expr *E : Attr->aligneds()) {
11676         E = E->IgnoreParenImpCasts();
11677         unsigned Pos;
11678         QualType ParmTy;
11679         if (isa<CXXThisExpr>(E)) {
11680           Pos = ParamPositions[FD];
11681           ParmTy = E->getType();
11682         } else {
11683           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11684                                 ->getCanonicalDecl();
11685           Pos = ParamPositions[PVD];
11686           ParmTy = PVD->getType();
11687         }
11688         ParamAttrs[Pos].Alignment =
11689             (*NI)
11690                 ? (*NI)->EvaluateKnownConstInt(C)
11691                 : llvm::APSInt::getUnsigned(
11692                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11693                           .getQuantity());
11694         ++NI;
11695       }
11696       // Mark linear parameters.
11697       auto SI = Attr->steps_begin();
11698       auto MI = Attr->modifiers_begin();
11699       for (const Expr *E : Attr->linears()) {
11700         E = E->IgnoreParenImpCasts();
11701         unsigned Pos;
11702         // Rescaling factor needed to compute the linear parameter
11703         // value in the mangled name.
11704         unsigned PtrRescalingFactor = 1;
11705         if (isa<CXXThisExpr>(E)) {
11706           Pos = ParamPositions[FD];
11707         } else {
11708           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11709                                 ->getCanonicalDecl();
11710           Pos = ParamPositions[PVD];
11711           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11712             PtrRescalingFactor = CGM.getContext()
11713                                      .getTypeSizeInChars(P->getPointeeType())
11714                                      .getQuantity();
11715         }
11716         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11717         ParamAttr.Kind = Linear;
11718         // Assuming a stride of 1, for `linear` without modifiers.
11719         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11720         if (*SI) {
11721           Expr::EvalResult Result;
11722           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11723             if (const auto *DRE =
11724                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11725               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11726                 ParamAttr.Kind = LinearWithVarStride;
11727                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11728                     ParamPositions[StridePVD->getCanonicalDecl()]);
11729               }
11730             }
11731           } else {
11732             ParamAttr.StrideOrArg = Result.Val.getInt();
11733           }
11734         }
11735         // If we are using a linear clause on a pointer, we need to
11736         // rescale the value of linear_step with the byte size of the
11737         // pointee type.
11738         if (Linear == ParamAttr.Kind)
11739           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11740         ++SI;
11741         ++MI;
11742       }
11743       llvm::APSInt VLENVal;
11744       SourceLocation ExprLoc;
11745       const Expr *VLENExpr = Attr->getSimdlen();
11746       if (VLENExpr) {
11747         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11748         ExprLoc = VLENExpr->getExprLoc();
11749       }
11750       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11751       if (CGM.getTriple().isX86()) {
11752         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11753       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11754         unsigned VLEN = VLENVal.getExtValue();
11755         StringRef MangledName = Fn->getName();
11756         if (CGM.getTarget().hasFeature("sve"))
11757           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11758                                          MangledName, 's', 128, Fn, ExprLoc);
11759         if (CGM.getTarget().hasFeature("neon"))
11760           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11761                                          MangledName, 'n', 128, Fn, ExprLoc);
11762       }
11763     }
11764     FD = FD->getPreviousDecl();
11765   }
11766 }
11767 
11768 namespace {
11769 /// Cleanup action for doacross support.
11770 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11771 public:
11772   static const int DoacrossFinArgs = 2;
11773 
11774 private:
11775   llvm::FunctionCallee RTLFn;
11776   llvm::Value *Args[DoacrossFinArgs];
11777 
11778 public:
11779   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11780                     ArrayRef<llvm::Value *> CallArgs)
11781       : RTLFn(RTLFn) {
11782     assert(CallArgs.size() == DoacrossFinArgs);
11783     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11784   }
11785   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11786     if (!CGF.HaveInsertPoint())
11787       return;
11788     CGF.EmitRuntimeCall(RTLFn, Args);
11789   }
11790 };
11791 } // namespace
11792 
11793 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11794                                        const OMPLoopDirective &D,
11795                                        ArrayRef<Expr *> NumIterations) {
11796   if (!CGF.HaveInsertPoint())
11797     return;
11798 
11799   ASTContext &C = CGM.getContext();
11800   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11801   RecordDecl *RD;
11802   if (KmpDimTy.isNull()) {
11803     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11804     //  kmp_int64 lo; // lower
11805     //  kmp_int64 up; // upper
11806     //  kmp_int64 st; // stride
11807     // };
11808     RD = C.buildImplicitRecord("kmp_dim");
11809     RD->startDefinition();
11810     addFieldToRecordDecl(C, RD, Int64Ty);
11811     addFieldToRecordDecl(C, RD, Int64Ty);
11812     addFieldToRecordDecl(C, RD, Int64Ty);
11813     RD->completeDefinition();
11814     KmpDimTy = C.getRecordType(RD);
11815   } else {
11816     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11817   }
11818   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11819   QualType ArrayTy =
11820       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11821 
11822   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11823   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11824   enum { LowerFD = 0, UpperFD, StrideFD };
11825   // Fill dims with data.
11826   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11827     LValue DimsLVal = CGF.MakeAddrLValue(
11828         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11829     // dims.upper = num_iterations;
11830     LValue UpperLVal = CGF.EmitLValueForField(
11831         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11832     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11833         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11834         Int64Ty, NumIterations[I]->getExprLoc());
11835     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11836     // dims.stride = 1;
11837     LValue StrideLVal = CGF.EmitLValueForField(
11838         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11839     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11840                           StrideLVal);
11841   }
11842 
11843   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11844   // kmp_int32 num_dims, struct kmp_dim * dims);
11845   llvm::Value *Args[] = {
11846       emitUpdateLocation(CGF, D.getBeginLoc()),
11847       getThreadID(CGF, D.getBeginLoc()),
11848       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11849       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11850           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11851           CGM.VoidPtrTy)};
11852 
11853   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11854       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11855   CGF.EmitRuntimeCall(RTLFn, Args);
11856   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11857       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11858   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11859       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11860   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11861                                              llvm::makeArrayRef(FiniArgs));
11862 }
11863 
11864 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11865                                           const OMPDependClause *C) {
11866   QualType Int64Ty =
11867       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11868   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11869   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11870       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11871   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11872   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11873     const Expr *CounterVal = C->getLoopData(I);
11874     assert(CounterVal);
11875     llvm::Value *CntVal = CGF.EmitScalarConversion(
11876         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11877         CounterVal->getExprLoc());
11878     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11879                           /*Volatile=*/false, Int64Ty);
11880   }
11881   llvm::Value *Args[] = {
11882       emitUpdateLocation(CGF, C->getBeginLoc()),
11883       getThreadID(CGF, C->getBeginLoc()),
11884       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11885   llvm::FunctionCallee RTLFn;
11886   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11887     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11888                                                   OMPRTL___kmpc_doacross_post);
11889   } else {
11890     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11891     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11892                                                   OMPRTL___kmpc_doacross_wait);
11893   }
11894   CGF.EmitRuntimeCall(RTLFn, Args);
11895 }
11896 
11897 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11898                                llvm::FunctionCallee Callee,
11899                                ArrayRef<llvm::Value *> Args) const {
11900   assert(Loc.isValid() && "Outlined function call location must be valid.");
11901   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11902 
11903   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11904     if (Fn->doesNotThrow()) {
11905       CGF.EmitNounwindRuntimeCall(Fn, Args);
11906       return;
11907     }
11908   }
11909   CGF.EmitRuntimeCall(Callee, Args);
11910 }
11911 
11912 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11913     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11914     ArrayRef<llvm::Value *> Args) const {
11915   emitCall(CGF, Loc, OutlinedFn, Args);
11916 }
11917 
11918 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11919   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11920     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11921       HasEmittedDeclareTargetRegion = true;
11922 }
11923 
11924 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11925                                              const VarDecl *NativeParam,
11926                                              const VarDecl *TargetParam) const {
11927   return CGF.GetAddrOfLocalVar(NativeParam);
11928 }
11929 
11930 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11931                                                    const VarDecl *VD) {
11932   if (!VD)
11933     return Address::invalid();
11934   Address UntiedAddr = Address::invalid();
11935   Address UntiedRealAddr = Address::invalid();
11936   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11937   if (It != FunctionToUntiedTaskStackMap.end()) {
11938     const UntiedLocalVarsAddressesMap &UntiedData =
11939         UntiedLocalVarsStack[It->second];
11940     auto I = UntiedData.find(VD);
11941     if (I != UntiedData.end()) {
11942       UntiedAddr = I->second.first;
11943       UntiedRealAddr = I->second.second;
11944     }
11945   }
11946   const VarDecl *CVD = VD->getCanonicalDecl();
11947   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11948     // Use the default allocation.
11949     if (!isAllocatableDecl(VD))
11950       return UntiedAddr;
11951     llvm::Value *Size;
11952     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11953     if (CVD->getType()->isVariablyModifiedType()) {
11954       Size = CGF.getTypeSize(CVD->getType());
11955       // Align the size: ((size + align - 1) / align) * align
11956       Size = CGF.Builder.CreateNUWAdd(
11957           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11958       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11959       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11960     } else {
11961       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11962       Size = CGM.getSize(Sz.alignTo(Align));
11963     }
11964     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11965     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11966     assert(AA->getAllocator() &&
11967            "Expected allocator expression for non-default allocator.");
11968     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11969     // According to the standard, the original allocator type is a enum
11970     // (integer). Convert to pointer type, if required.
11971     Allocator = CGF.EmitScalarConversion(
11972         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11973         AA->getAllocator()->getExprLoc());
11974     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11975 
11976     llvm::Value *Addr =
11977         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11978                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11979                             Args, getName({CVD->getName(), ".void.addr"}));
11980     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11981         CGM.getModule(), OMPRTL___kmpc_free);
11982     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11983     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11984         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11985     if (UntiedAddr.isValid())
11986       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11987 
11988     // Cleanup action for allocate support.
11989     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11990       llvm::FunctionCallee RTLFn;
11991       unsigned LocEncoding;
11992       Address Addr;
11993       const Expr *Allocator;
11994 
11995     public:
11996       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11997                            Address Addr, const Expr *Allocator)
11998           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11999             Allocator(Allocator) {}
12000       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12001         if (!CGF.HaveInsertPoint())
12002           return;
12003         llvm::Value *Args[3];
12004         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12005             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12006         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12007             Addr.getPointer(), CGF.VoidPtrTy);
12008         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12009         // According to the standard, the original allocator type is a enum
12010         // (integer). Convert to pointer type, if required.
12011         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12012                                             CGF.getContext().VoidPtrTy,
12013                                             Allocator->getExprLoc());
12014         Args[2] = AllocVal;
12015 
12016         CGF.EmitRuntimeCall(RTLFn, Args);
12017       }
12018     };
12019     Address VDAddr =
12020         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12021     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12022         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12023         VDAddr, AA->getAllocator());
12024     if (UntiedRealAddr.isValid())
12025       if (auto *Region =
12026               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12027         Region->emitUntiedSwitch(CGF);
12028     return VDAddr;
12029   }
12030   return UntiedAddr;
12031 }
12032 
12033 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12034                                              const VarDecl *VD) const {
12035   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12036   if (It == FunctionToUntiedTaskStackMap.end())
12037     return false;
12038   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12039 }
12040 
12041 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12042     CodeGenModule &CGM, const OMPLoopDirective &S)
12043     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12044   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12045   if (!NeedToPush)
12046     return;
12047   NontemporalDeclsSet &DS =
12048       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12049   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12050     for (const Stmt *Ref : C->private_refs()) {
12051       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12052       const ValueDecl *VD;
12053       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12054         VD = DRE->getDecl();
12055       } else {
12056         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12057         assert((ME->isImplicitCXXThis() ||
12058                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12059                "Expected member of current class.");
12060         VD = ME->getMemberDecl();
12061       }
12062       DS.insert(VD);
12063     }
12064   }
12065 }
12066 
12067 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12068   if (!NeedToPush)
12069     return;
12070   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12071 }
12072 
12073 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12074     CodeGenFunction &CGF,
12075     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12076                          std::pair<Address, Address>> &LocalVars)
12077     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12078   if (!NeedToPush)
12079     return;
12080   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12081       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12082   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12083 }
12084 
12085 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12086   if (!NeedToPush)
12087     return;
12088   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12089 }
12090 
12091 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12092   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12093 
12094   return llvm::any_of(
12095       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12096       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12097 }
12098 
12099 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12100     const OMPExecutableDirective &S,
12101     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12102     const {
12103   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12104   // Vars in target/task regions must be excluded completely.
12105   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12106       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12107     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12108     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12109     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12110     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12111       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12112         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12113     }
12114   }
12115   // Exclude vars in private clauses.
12116   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12117     for (const Expr *Ref : C->varlists()) {
12118       if (!Ref->getType()->isScalarType())
12119         continue;
12120       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12121       if (!DRE)
12122         continue;
12123       NeedToCheckForLPCs.insert(DRE->getDecl());
12124     }
12125   }
12126   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12127     for (const Expr *Ref : C->varlists()) {
12128       if (!Ref->getType()->isScalarType())
12129         continue;
12130       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12131       if (!DRE)
12132         continue;
12133       NeedToCheckForLPCs.insert(DRE->getDecl());
12134     }
12135   }
12136   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12137     for (const Expr *Ref : C->varlists()) {
12138       if (!Ref->getType()->isScalarType())
12139         continue;
12140       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12141       if (!DRE)
12142         continue;
12143       NeedToCheckForLPCs.insert(DRE->getDecl());
12144     }
12145   }
12146   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12147     for (const Expr *Ref : C->varlists()) {
12148       if (!Ref->getType()->isScalarType())
12149         continue;
12150       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12151       if (!DRE)
12152         continue;
12153       NeedToCheckForLPCs.insert(DRE->getDecl());
12154     }
12155   }
12156   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12157     for (const Expr *Ref : C->varlists()) {
12158       if (!Ref->getType()->isScalarType())
12159         continue;
12160       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12161       if (!DRE)
12162         continue;
12163       NeedToCheckForLPCs.insert(DRE->getDecl());
12164     }
12165   }
12166   for (const Decl *VD : NeedToCheckForLPCs) {
12167     for (const LastprivateConditionalData &Data :
12168          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12169       if (Data.DeclToUniqueName.count(VD) > 0) {
12170         if (!Data.Disabled)
12171           NeedToAddForLPCsAsDisabled.insert(VD);
12172         break;
12173       }
12174     }
12175   }
12176 }
12177 
12178 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12179     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12180     : CGM(CGF.CGM),
12181       Action((CGM.getLangOpts().OpenMP >= 50 &&
12182               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12183                            [](const OMPLastprivateClause *C) {
12184                              return C->getKind() ==
12185                                     OMPC_LASTPRIVATE_conditional;
12186                            }))
12187                  ? ActionToDo::PushAsLastprivateConditional
12188                  : ActionToDo::DoNotPush) {
12189   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12190   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12191     return;
12192   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12193          "Expected a push action.");
12194   LastprivateConditionalData &Data =
12195       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12196   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12197     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12198       continue;
12199 
12200     for (const Expr *Ref : C->varlists()) {
12201       Data.DeclToUniqueName.insert(std::make_pair(
12202           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12203           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12204     }
12205   }
12206   Data.IVLVal = IVLVal;
12207   Data.Fn = CGF.CurFn;
12208 }
12209 
12210 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12211     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12212     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12213   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12214   if (CGM.getLangOpts().OpenMP < 50)
12215     return;
12216   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12217   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12218   if (!NeedToAddForLPCsAsDisabled.empty()) {
12219     Action = ActionToDo::DisableLastprivateConditional;
12220     LastprivateConditionalData &Data =
12221         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12222     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12223       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12224     Data.Fn = CGF.CurFn;
12225     Data.Disabled = true;
12226   }
12227 }
12228 
12229 CGOpenMPRuntime::LastprivateConditionalRAII
12230 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12231     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12232   return LastprivateConditionalRAII(CGF, S);
12233 }
12234 
12235 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12236   if (CGM.getLangOpts().OpenMP < 50)
12237     return;
12238   if (Action == ActionToDo::DisableLastprivateConditional) {
12239     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12240            "Expected list of disabled private vars.");
12241     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12242   }
12243   if (Action == ActionToDo::PushAsLastprivateConditional) {
12244     assert(
12245         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12246         "Expected list of lastprivate conditional vars.");
12247     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12248   }
12249 }
12250 
12251 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12252                                                         const VarDecl *VD) {
12253   ASTContext &C = CGM.getContext();
12254   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12255   if (I == LastprivateConditionalToTypes.end())
12256     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12257   QualType NewType;
12258   const FieldDecl *VDField;
12259   const FieldDecl *FiredField;
12260   LValue BaseLVal;
12261   auto VI = I->getSecond().find(VD);
12262   if (VI == I->getSecond().end()) {
12263     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12264     RD->startDefinition();
12265     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12266     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12267     RD->completeDefinition();
12268     NewType = C.getRecordType(RD);
12269     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12270     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12271     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12272   } else {
12273     NewType = std::get<0>(VI->getSecond());
12274     VDField = std::get<1>(VI->getSecond());
12275     FiredField = std::get<2>(VI->getSecond());
12276     BaseLVal = std::get<3>(VI->getSecond());
12277   }
12278   LValue FiredLVal =
12279       CGF.EmitLValueForField(BaseLVal, FiredField);
12280   CGF.EmitStoreOfScalar(
12281       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12282       FiredLVal);
12283   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12284 }
12285 
12286 namespace {
12287 /// Checks if the lastprivate conditional variable is referenced in LHS.
12288 class LastprivateConditionalRefChecker final
12289     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12290   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12291   const Expr *FoundE = nullptr;
12292   const Decl *FoundD = nullptr;
12293   StringRef UniqueDeclName;
12294   LValue IVLVal;
12295   llvm::Function *FoundFn = nullptr;
12296   SourceLocation Loc;
12297 
12298 public:
12299   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12300     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12301          llvm::reverse(LPM)) {
12302       auto It = D.DeclToUniqueName.find(E->getDecl());
12303       if (It == D.DeclToUniqueName.end())
12304         continue;
12305       if (D.Disabled)
12306         return false;
12307       FoundE = E;
12308       FoundD = E->getDecl()->getCanonicalDecl();
12309       UniqueDeclName = It->second;
12310       IVLVal = D.IVLVal;
12311       FoundFn = D.Fn;
12312       break;
12313     }
12314     return FoundE == E;
12315   }
12316   bool VisitMemberExpr(const MemberExpr *E) {
12317     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12318       return false;
12319     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12320          llvm::reverse(LPM)) {
12321       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12322       if (It == D.DeclToUniqueName.end())
12323         continue;
12324       if (D.Disabled)
12325         return false;
12326       FoundE = E;
12327       FoundD = E->getMemberDecl()->getCanonicalDecl();
12328       UniqueDeclName = It->second;
12329       IVLVal = D.IVLVal;
12330       FoundFn = D.Fn;
12331       break;
12332     }
12333     return FoundE == E;
12334   }
12335   bool VisitStmt(const Stmt *S) {
12336     for (const Stmt *Child : S->children()) {
12337       if (!Child)
12338         continue;
12339       if (const auto *E = dyn_cast<Expr>(Child))
12340         if (!E->isGLValue())
12341           continue;
12342       if (Visit(Child))
12343         return true;
12344     }
12345     return false;
12346   }
12347   explicit LastprivateConditionalRefChecker(
12348       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12349       : LPM(LPM) {}
12350   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12351   getFoundData() const {
12352     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12353   }
12354 };
12355 } // namespace
12356 
12357 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12358                                                        LValue IVLVal,
12359                                                        StringRef UniqueDeclName,
12360                                                        LValue LVal,
12361                                                        SourceLocation Loc) {
12362   // Last updated loop counter for the lastprivate conditional var.
12363   // int<xx> last_iv = 0;
12364   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12365   llvm::Constant *LastIV =
12366       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12367   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12368       IVLVal.getAlignment().getAsAlign());
12369   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12370 
12371   // Last value of the lastprivate conditional.
12372   // decltype(priv_a) last_a;
12373   llvm::Constant *Last = getOrCreateInternalVariable(
12374       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12375   cast<llvm::GlobalVariable>(Last)->setAlignment(
12376       LVal.getAlignment().getAsAlign());
12377   LValue LastLVal =
12378       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12379 
12380   // Global loop counter. Required to handle inner parallel-for regions.
12381   // iv
12382   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12383 
12384   // #pragma omp critical(a)
12385   // if (last_iv <= iv) {
12386   //   last_iv = iv;
12387   //   last_a = priv_a;
12388   // }
12389   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12390                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12391     Action.Enter(CGF);
12392     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12393     // (last_iv <= iv) ? Check if the variable is updated and store new
12394     // value in global var.
12395     llvm::Value *CmpRes;
12396     if (IVLVal.getType()->isSignedIntegerType()) {
12397       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12398     } else {
12399       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12400              "Loop iteration variable must be integer.");
12401       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12402     }
12403     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12404     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12405     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12406     // {
12407     CGF.EmitBlock(ThenBB);
12408 
12409     //   last_iv = iv;
12410     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12411 
12412     //   last_a = priv_a;
12413     switch (CGF.getEvaluationKind(LVal.getType())) {
12414     case TEK_Scalar: {
12415       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12416       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12417       break;
12418     }
12419     case TEK_Complex: {
12420       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12421       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12422       break;
12423     }
12424     case TEK_Aggregate:
12425       llvm_unreachable(
12426           "Aggregates are not supported in lastprivate conditional.");
12427     }
12428     // }
12429     CGF.EmitBranch(ExitBB);
12430     // There is no need to emit line number for unconditional branch.
12431     (void)ApplyDebugLocation::CreateEmpty(CGF);
12432     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12433   };
12434 
12435   if (CGM.getLangOpts().OpenMPSimd) {
12436     // Do not emit as a critical region as no parallel region could be emitted.
12437     RegionCodeGenTy ThenRCG(CodeGen);
12438     ThenRCG(CGF);
12439   } else {
12440     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12441   }
12442 }
12443 
12444 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12445                                                          const Expr *LHS) {
12446   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12447     return;
12448   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12449   if (!Checker.Visit(LHS))
12450     return;
12451   const Expr *FoundE;
12452   const Decl *FoundD;
12453   StringRef UniqueDeclName;
12454   LValue IVLVal;
12455   llvm::Function *FoundFn;
12456   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12457       Checker.getFoundData();
12458   if (FoundFn != CGF.CurFn) {
12459     // Special codegen for inner parallel regions.
12460     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12461     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12462     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12463            "Lastprivate conditional is not found in outer region.");
12464     QualType StructTy = std::get<0>(It->getSecond());
12465     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12466     LValue PrivLVal = CGF.EmitLValue(FoundE);
12467     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12468         PrivLVal.getAddress(CGF),
12469         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12470     LValue BaseLVal =
12471         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12472     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12473     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12474                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12475                         FiredLVal, llvm::AtomicOrdering::Unordered,
12476                         /*IsVolatile=*/true, /*isInit=*/false);
12477     return;
12478   }
12479 
12480   // Private address of the lastprivate conditional in the current context.
12481   // priv_a
12482   LValue LVal = CGF.EmitLValue(FoundE);
12483   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12484                                    FoundE->getExprLoc());
12485 }
12486 
12487 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12488     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12489     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12490   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12491     return;
12492   auto Range = llvm::reverse(LastprivateConditionalStack);
12493   auto It = llvm::find_if(
12494       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12495   if (It == Range.end() || It->Fn != CGF.CurFn)
12496     return;
12497   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12498   assert(LPCI != LastprivateConditionalToTypes.end() &&
12499          "Lastprivates must be registered already.");
12500   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12501   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12502   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12503   for (const auto &Pair : It->DeclToUniqueName) {
12504     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12505     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12506       continue;
12507     auto I = LPCI->getSecond().find(Pair.first);
12508     assert(I != LPCI->getSecond().end() &&
12509            "Lastprivate must be rehistered already.");
12510     // bool Cmp = priv_a.Fired != 0;
12511     LValue BaseLVal = std::get<3>(I->getSecond());
12512     LValue FiredLVal =
12513         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12514     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12515     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12516     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12517     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12518     // if (Cmp) {
12519     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12520     CGF.EmitBlock(ThenBB);
12521     Address Addr = CGF.GetAddrOfLocalVar(VD);
12522     LValue LVal;
12523     if (VD->getType()->isReferenceType())
12524       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12525                                            AlignmentSource::Decl);
12526     else
12527       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12528                                 AlignmentSource::Decl);
12529     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12530                                      D.getBeginLoc());
12531     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12532     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12533     // }
12534   }
12535 }
12536 
12537 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12538     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12539     SourceLocation Loc) {
12540   if (CGF.getLangOpts().OpenMP < 50)
12541     return;
12542   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12543   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12544          "Unknown lastprivate conditional variable.");
12545   StringRef UniqueName = It->second;
12546   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12547   // The variable was not updated in the region - exit.
12548   if (!GV)
12549     return;
12550   LValue LPLVal = CGF.MakeAddrLValue(
12551       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12552   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12553   CGF.EmitStoreOfScalar(Res, PrivLVal);
12554 }
12555 
12556 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12557     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12558     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12559   llvm_unreachable("Not supported in SIMD-only mode");
12560 }
12561 
12562 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12563     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12564     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12565   llvm_unreachable("Not supported in SIMD-only mode");
12566 }
12567 
12568 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12569     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12570     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12571     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12572     bool Tied, unsigned &NumberOfParts) {
12573   llvm_unreachable("Not supported in SIMD-only mode");
12574 }
12575 
12576 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12577                                            SourceLocation Loc,
12578                                            llvm::Function *OutlinedFn,
12579                                            ArrayRef<llvm::Value *> CapturedVars,
12580                                            const Expr *IfCond) {
12581   llvm_unreachable("Not supported in SIMD-only mode");
12582 }
12583 
12584 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12585     CodeGenFunction &CGF, StringRef CriticalName,
12586     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12587     const Expr *Hint) {
12588   llvm_unreachable("Not supported in SIMD-only mode");
12589 }
12590 
12591 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12592                                            const RegionCodeGenTy &MasterOpGen,
12593                                            SourceLocation Loc) {
12594   llvm_unreachable("Not supported in SIMD-only mode");
12595 }
12596 
12597 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12598                                             SourceLocation Loc) {
12599   llvm_unreachable("Not supported in SIMD-only mode");
12600 }
12601 
12602 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12603     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12604     SourceLocation Loc) {
12605   llvm_unreachable("Not supported in SIMD-only mode");
12606 }
12607 
12608 void CGOpenMPSIMDRuntime::emitSingleRegion(
12609     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12610     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12611     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12612     ArrayRef<const Expr *> AssignmentOps) {
12613   llvm_unreachable("Not supported in SIMD-only mode");
12614 }
12615 
12616 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12617                                             const RegionCodeGenTy &OrderedOpGen,
12618                                             SourceLocation Loc,
12619                                             bool IsThreads) {
12620   llvm_unreachable("Not supported in SIMD-only mode");
12621 }
12622 
12623 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12624                                           SourceLocation Loc,
12625                                           OpenMPDirectiveKind Kind,
12626                                           bool EmitChecks,
12627                                           bool ForceSimpleCall) {
12628   llvm_unreachable("Not supported in SIMD-only mode");
12629 }
12630 
12631 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12632     CodeGenFunction &CGF, SourceLocation Loc,
12633     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12634     bool Ordered, const DispatchRTInput &DispatchValues) {
12635   llvm_unreachable("Not supported in SIMD-only mode");
12636 }
12637 
12638 void CGOpenMPSIMDRuntime::emitForStaticInit(
12639     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12640     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12641   llvm_unreachable("Not supported in SIMD-only mode");
12642 }
12643 
12644 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12645     CodeGenFunction &CGF, SourceLocation Loc,
12646     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12647   llvm_unreachable("Not supported in SIMD-only mode");
12648 }
12649 
12650 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12651                                                      SourceLocation Loc,
12652                                                      unsigned IVSize,
12653                                                      bool IVSigned) {
12654   llvm_unreachable("Not supported in SIMD-only mode");
12655 }
12656 
12657 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12658                                               SourceLocation Loc,
12659                                               OpenMPDirectiveKind DKind) {
12660   llvm_unreachable("Not supported in SIMD-only mode");
12661 }
12662 
12663 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12664                                               SourceLocation Loc,
12665                                               unsigned IVSize, bool IVSigned,
12666                                               Address IL, Address LB,
12667                                               Address UB, Address ST) {
12668   llvm_unreachable("Not supported in SIMD-only mode");
12669 }
12670 
12671 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12672                                                llvm::Value *NumThreads,
12673                                                SourceLocation Loc) {
12674   llvm_unreachable("Not supported in SIMD-only mode");
12675 }
12676 
12677 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12678                                              ProcBindKind ProcBind,
12679                                              SourceLocation Loc) {
12680   llvm_unreachable("Not supported in SIMD-only mode");
12681 }
12682 
12683 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12684                                                     const VarDecl *VD,
12685                                                     Address VDAddr,
12686                                                     SourceLocation Loc) {
12687   llvm_unreachable("Not supported in SIMD-only mode");
12688 }
12689 
12690 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12691     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12692     CodeGenFunction *CGF) {
12693   llvm_unreachable("Not supported in SIMD-only mode");
12694 }
12695 
12696 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12697     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12698   llvm_unreachable("Not supported in SIMD-only mode");
12699 }
12700 
12701 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12702                                     ArrayRef<const Expr *> Vars,
12703                                     SourceLocation Loc,
12704                                     llvm::AtomicOrdering AO) {
12705   llvm_unreachable("Not supported in SIMD-only mode");
12706 }
12707 
12708 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12709                                        const OMPExecutableDirective &D,
12710                                        llvm::Function *TaskFunction,
12711                                        QualType SharedsTy, Address Shareds,
12712                                        const Expr *IfCond,
12713                                        const OMPTaskDataTy &Data) {
12714   llvm_unreachable("Not supported in SIMD-only mode");
12715 }
12716 
12717 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12718     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12719     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12720     const Expr *IfCond, const OMPTaskDataTy &Data) {
12721   llvm_unreachable("Not supported in SIMD-only mode");
12722 }
12723 
12724 void CGOpenMPSIMDRuntime::emitReduction(
12725     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12726     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12727     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12728   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12729   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12730                                  ReductionOps, Options);
12731 }
12732 
12733 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12734     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12735     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12736   llvm_unreachable("Not supported in SIMD-only mode");
12737 }
12738 
12739 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12740                                                 SourceLocation Loc,
12741                                                 bool IsWorksharingReduction) {
12742   llvm_unreachable("Not supported in SIMD-only mode");
12743 }
12744 
12745 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12746                                                   SourceLocation Loc,
12747                                                   ReductionCodeGen &RCG,
12748                                                   unsigned N) {
12749   llvm_unreachable("Not supported in SIMD-only mode");
12750 }
12751 
12752 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12753                                                   SourceLocation Loc,
12754                                                   llvm::Value *ReductionsPtr,
12755                                                   LValue SharedLVal) {
12756   llvm_unreachable("Not supported in SIMD-only mode");
12757 }
12758 
12759 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12760                                            SourceLocation Loc) {
12761   llvm_unreachable("Not supported in SIMD-only mode");
12762 }
12763 
12764 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12765     CodeGenFunction &CGF, SourceLocation Loc,
12766     OpenMPDirectiveKind CancelRegion) {
12767   llvm_unreachable("Not supported in SIMD-only mode");
12768 }
12769 
12770 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12771                                          SourceLocation Loc, const Expr *IfCond,
12772                                          OpenMPDirectiveKind CancelRegion) {
12773   llvm_unreachable("Not supported in SIMD-only mode");
12774 }
12775 
12776 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12777     const OMPExecutableDirective &D, StringRef ParentName,
12778     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12779     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12780   llvm_unreachable("Not supported in SIMD-only mode");
12781 }
12782 
12783 void CGOpenMPSIMDRuntime::emitTargetCall(
12784     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12785     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12786     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12787     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12788                                      const OMPLoopDirective &D)>
12789         SizeEmitter) {
12790   llvm_unreachable("Not supported in SIMD-only mode");
12791 }
12792 
12793 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12794   llvm_unreachable("Not supported in SIMD-only mode");
12795 }
12796 
12797 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12798   llvm_unreachable("Not supported in SIMD-only mode");
12799 }
12800 
12801 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12802   return false;
12803 }
12804 
12805 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12806                                         const OMPExecutableDirective &D,
12807                                         SourceLocation Loc,
12808                                         llvm::Function *OutlinedFn,
12809                                         ArrayRef<llvm::Value *> CapturedVars) {
12810   llvm_unreachable("Not supported in SIMD-only mode");
12811 }
12812 
12813 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12814                                              const Expr *NumTeams,
12815                                              const Expr *ThreadLimit,
12816                                              SourceLocation Loc) {
12817   llvm_unreachable("Not supported in SIMD-only mode");
12818 }
12819 
12820 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12821     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12822     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12823   llvm_unreachable("Not supported in SIMD-only mode");
12824 }
12825 
12826 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12827     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12828     const Expr *Device) {
12829   llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831 
12832 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12833                                            const OMPLoopDirective &D,
12834                                            ArrayRef<Expr *> NumIterations) {
12835   llvm_unreachable("Not supported in SIMD-only mode");
12836 }
12837 
12838 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12839                                               const OMPDependClause *C) {
12840   llvm_unreachable("Not supported in SIMD-only mode");
12841 }
12842 
12843 const VarDecl *
12844 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12845                                         const VarDecl *NativeParam) const {
12846   llvm_unreachable("Not supported in SIMD-only mode");
12847 }
12848 
12849 Address
12850 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12851                                          const VarDecl *NativeParam,
12852                                          const VarDecl *TargetParam) const {
12853   llvm_unreachable("Not supported in SIMD-only mode");
12854 }
12855