1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101 
2102     // Ensure we do not inline the function. This is trivially true for the ones
2103     // passed to __kmpc_fork_call but the ones calles in serialized regions
2104     // could be inlined. This is not a perfect but it is closer to the invariant
2105     // we want, namely, every data environment starts with a new function.
2106     // TODO: We should pass the if condition to the runtime function and do the
2107     //       handling there. Much cleaner code.
2108     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2109     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2110 
2111     // __kmpc_end_serialized_parallel(&Loc, GTid);
2112     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2113     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2114                             M, OMPRTL___kmpc_end_serialized_parallel),
2115                         EndArgs);
2116   };
2117   if (IfCond) {
2118     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2119   } else {
2120     RegionCodeGenTy ThenRCG(ThenGen);
2121     ThenRCG(CGF);
2122   }
2123 }
2124 
2125 // If we're inside an (outlined) parallel region, use the region info's
2126 // thread-ID variable (it is passed in a first argument of the outlined function
2127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2128 // regular serial code region, get thread ID by calling kmp_int32
2129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2130 // return the address of that temp.
2131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2132                                              SourceLocation Loc) {
2133   if (auto *OMPRegionInfo =
2134           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2135     if (OMPRegionInfo->getThreadIDVariable())
2136       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2137 
2138   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2139   QualType Int32Ty =
2140       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2141   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2142   CGF.EmitStoreOfScalar(ThreadID,
2143                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2144 
2145   return ThreadIDTemp;
2146 }
2147 
2148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2149     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2150   SmallString<256> Buffer;
2151   llvm::raw_svector_ostream Out(Buffer);
2152   Out << Name;
2153   StringRef RuntimeName = Out.str();
2154   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2155   if (Elem.second) {
2156     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2157            "OMP internal variable has different type than requested");
2158     return &*Elem.second;
2159   }
2160 
2161   return Elem.second = new llvm::GlobalVariable(
2162              CGM.getModule(), Ty, /*IsConstant*/ false,
2163              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2164              Elem.first(), /*InsertBefore=*/nullptr,
2165              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2166 }
2167 
2168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2169   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2170   std::string Name = getName({Prefix, "var"});
2171   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2172 }
2173 
2174 namespace {
2175 /// Common pre(post)-action for different OpenMP constructs.
2176 class CommonActionTy final : public PrePostActionTy {
2177   llvm::FunctionCallee EnterCallee;
2178   ArrayRef<llvm::Value *> EnterArgs;
2179   llvm::FunctionCallee ExitCallee;
2180   ArrayRef<llvm::Value *> ExitArgs;
2181   bool Conditional;
2182   llvm::BasicBlock *ContBlock = nullptr;
2183 
2184 public:
2185   CommonActionTy(llvm::FunctionCallee EnterCallee,
2186                  ArrayRef<llvm::Value *> EnterArgs,
2187                  llvm::FunctionCallee ExitCallee,
2188                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2189       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2190         ExitArgs(ExitArgs), Conditional(Conditional) {}
2191   void Enter(CodeGenFunction &CGF) override {
2192     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2193     if (Conditional) {
2194       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2195       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2196       ContBlock = CGF.createBasicBlock("omp_if.end");
2197       // Generate the branch (If-stmt)
2198       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2199       CGF.EmitBlock(ThenBlock);
2200     }
2201   }
2202   void Done(CodeGenFunction &CGF) {
2203     // Emit the rest of blocks/branches
2204     CGF.EmitBranch(ContBlock);
2205     CGF.EmitBlock(ContBlock, true);
2206   }
2207   void Exit(CodeGenFunction &CGF) override {
2208     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2209   }
2210 };
2211 } // anonymous namespace
2212 
2213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2214                                          StringRef CriticalName,
2215                                          const RegionCodeGenTy &CriticalOpGen,
2216                                          SourceLocation Loc, const Expr *Hint) {
2217   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2218   // CriticalOpGen();
2219   // __kmpc_end_critical(ident_t *, gtid, Lock);
2220   // Prepare arguments and build a call to __kmpc_critical
2221   if (!CGF.HaveInsertPoint())
2222     return;
2223   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2224                          getCriticalRegionLock(CriticalName)};
2225   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2226                                                 std::end(Args));
2227   if (Hint) {
2228     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2229         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2230   }
2231   CommonActionTy Action(
2232       OMPBuilder.getOrCreateRuntimeFunction(
2233           CGM.getModule(),
2234           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2235       EnterArgs,
2236       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2237                                             OMPRTL___kmpc_end_critical),
2238       Args);
2239   CriticalOpGen.setAction(Action);
2240   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2241 }
2242 
2243 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2244                                        const RegionCodeGenTy &MasterOpGen,
2245                                        SourceLocation Loc) {
2246   if (!CGF.HaveInsertPoint())
2247     return;
2248   // if(__kmpc_master(ident_t *, gtid)) {
2249   //   MasterOpGen();
2250   //   __kmpc_end_master(ident_t *, gtid);
2251   // }
2252   // Prepare arguments and build a call to __kmpc_master
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2254   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2255                             CGM.getModule(), OMPRTL___kmpc_master),
2256                         Args,
2257                         OMPBuilder.getOrCreateRuntimeFunction(
2258                             CGM.getModule(), OMPRTL___kmpc_end_master),
2259                         Args,
2260                         /*Conditional=*/true);
2261   MasterOpGen.setAction(Action);
2262   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2263   Action.Done(CGF);
2264 }
2265 
2266 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2267                                         SourceLocation Loc) {
2268   if (!CGF.HaveInsertPoint())
2269     return;
2270   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2271     OMPBuilder.createTaskyield(CGF.Builder);
2272   } else {
2273     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2274     llvm::Value *Args[] = {
2275         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2276         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2277     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2278                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2279                         Args);
2280   }
2281 
2282   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2283     Region->emitUntiedSwitch(CGF);
2284 }
2285 
2286 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2287                                           const RegionCodeGenTy &TaskgroupOpGen,
2288                                           SourceLocation Loc) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   // __kmpc_taskgroup(ident_t *, gtid);
2292   // TaskgroupOpGen();
2293   // __kmpc_end_taskgroup(ident_t *, gtid);
2294   // Prepare arguments and build a call to __kmpc_taskgroup
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2298                         Args,
2299                         OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2301                         Args);
2302   TaskgroupOpGen.setAction(Action);
2303   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2304 }
2305 
2306 /// Given an array of pointers to variables, project the address of a
2307 /// given variable.
2308 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2309                                       unsigned Index, const VarDecl *Var) {
2310   // Pull out the pointer to the variable.
2311   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2312   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2313 
2314   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2315   Addr = CGF.Builder.CreateElementBitCast(
2316       Addr, CGF.ConvertTypeForMem(Var->getType()));
2317   return Addr;
2318 }
2319 
2320 static llvm::Value *emitCopyprivateCopyFunction(
2321     CodeGenModule &CGM, llvm::Type *ArgsType,
2322     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2323     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2324     SourceLocation Loc) {
2325   ASTContext &C = CGM.getContext();
2326   // void copy_func(void *LHSArg, void *RHSArg);
2327   FunctionArgList Args;
2328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2329                            ImplicitParamDecl::Other);
2330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2331                            ImplicitParamDecl::Other);
2332   Args.push_back(&LHSArg);
2333   Args.push_back(&RHSArg);
2334   const auto &CGFI =
2335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2336   std::string Name =
2337       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2338   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2339                                     llvm::GlobalValue::InternalLinkage, Name,
2340                                     &CGM.getModule());
2341   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2342   Fn->setDoesNotRecurse();
2343   CodeGenFunction CGF(CGM);
2344   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2345   // Dest = (void*[n])(LHSArg);
2346   // Src = (void*[n])(RHSArg);
2347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2348       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2349       ArgsType), CGF.getPointerAlign());
2350   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2351       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2352       ArgsType), CGF.getPointerAlign());
2353   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2354   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2355   // ...
2356   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2357   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2358     const auto *DestVar =
2359         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2360     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2361 
2362     const auto *SrcVar =
2363         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2364     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2365 
2366     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2367     QualType Type = VD->getType();
2368     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2369   }
2370   CGF.FinishFunction();
2371   return Fn;
2372 }
2373 
2374 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2375                                        const RegionCodeGenTy &SingleOpGen,
2376                                        SourceLocation Loc,
2377                                        ArrayRef<const Expr *> CopyprivateVars,
2378                                        ArrayRef<const Expr *> SrcExprs,
2379                                        ArrayRef<const Expr *> DstExprs,
2380                                        ArrayRef<const Expr *> AssignmentOps) {
2381   if (!CGF.HaveInsertPoint())
2382     return;
2383   assert(CopyprivateVars.size() == SrcExprs.size() &&
2384          CopyprivateVars.size() == DstExprs.size() &&
2385          CopyprivateVars.size() == AssignmentOps.size());
2386   ASTContext &C = CGM.getContext();
2387   // int32 did_it = 0;
2388   // if(__kmpc_single(ident_t *, gtid)) {
2389   //   SingleOpGen();
2390   //   __kmpc_end_single(ident_t *, gtid);
2391   //   did_it = 1;
2392   // }
2393   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2394   // <copy_func>, did_it);
2395 
2396   Address DidIt = Address::invalid();
2397   if (!CopyprivateVars.empty()) {
2398     // int32 did_it = 0;
2399     QualType KmpInt32Ty =
2400         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2401     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2402     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2403   }
2404   // Prepare arguments and build a call to __kmpc_single
2405   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2406   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2407                             CGM.getModule(), OMPRTL___kmpc_single),
2408                         Args,
2409                         OMPBuilder.getOrCreateRuntimeFunction(
2410                             CGM.getModule(), OMPRTL___kmpc_end_single),
2411                         Args,
2412                         /*Conditional=*/true);
2413   SingleOpGen.setAction(Action);
2414   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2415   if (DidIt.isValid()) {
2416     // did_it = 1;
2417     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2418   }
2419   Action.Done(CGF);
2420   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2421   // <copy_func>, did_it);
2422   if (DidIt.isValid()) {
2423     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2424     QualType CopyprivateArrayTy = C.getConstantArrayType(
2425         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2426         /*IndexTypeQuals=*/0);
2427     // Create a list of all private variables for copyprivate.
2428     Address CopyprivateList =
2429         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2430     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2431       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2432       CGF.Builder.CreateStore(
2433           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2434               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2435               CGF.VoidPtrTy),
2436           Elem);
2437     }
2438     // Build function that copies private values from single region to all other
2439     // threads in the corresponding parallel region.
2440     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2441         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2442         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2443     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2444     Address CL =
2445       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2446                                                       CGF.VoidPtrTy);
2447     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2448     llvm::Value *Args[] = {
2449         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2450         getThreadID(CGF, Loc),        // i32 <gtid>
2451         BufSize,                      // size_t <buf_size>
2452         CL.getPointer(),              // void *<copyprivate list>
2453         CpyFn,                        // void (*) (void *, void *) <copy_func>
2454         DidItVal                      // i32 did_it
2455     };
2456     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2458                         Args);
2459   }
2460 }
2461 
2462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2463                                         const RegionCodeGenTy &OrderedOpGen,
2464                                         SourceLocation Loc, bool IsThreads) {
2465   if (!CGF.HaveInsertPoint())
2466     return;
2467   // __kmpc_ordered(ident_t *, gtid);
2468   // OrderedOpGen();
2469   // __kmpc_end_ordered(ident_t *, gtid);
2470   // Prepare arguments and build a call to __kmpc_ordered
2471   if (IsThreads) {
2472     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2473     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2474                               CGM.getModule(), OMPRTL___kmpc_ordered),
2475                           Args,
2476                           OMPBuilder.getOrCreateRuntimeFunction(
2477                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2478                           Args);
2479     OrderedOpGen.setAction(Action);
2480     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2481     return;
2482   }
2483   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2484 }
2485 
2486 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2487   unsigned Flags;
2488   if (Kind == OMPD_for)
2489     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2490   else if (Kind == OMPD_sections)
2491     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2492   else if (Kind == OMPD_single)
2493     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2494   else if (Kind == OMPD_barrier)
2495     Flags = OMP_IDENT_BARRIER_EXPL;
2496   else
2497     Flags = OMP_IDENT_BARRIER_IMPL;
2498   return Flags;
2499 }
2500 
2501 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2502     CodeGenFunction &CGF, const OMPLoopDirective &S,
2503     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2504   // Check if the loop directive is actually a doacross loop directive. In this
2505   // case choose static, 1 schedule.
2506   if (llvm::any_of(
2507           S.getClausesOfKind<OMPOrderedClause>(),
2508           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2509     ScheduleKind = OMPC_SCHEDULE_static;
2510     // Chunk size is 1 in this case.
2511     llvm::APInt ChunkSize(32, 1);
2512     ChunkExpr = IntegerLiteral::Create(
2513         CGF.getContext(), ChunkSize,
2514         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2515         SourceLocation());
2516   }
2517 }
2518 
2519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2520                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2521                                       bool ForceSimpleCall) {
2522   // Check if we should use the OMPBuilder
2523   auto *OMPRegionInfo =
2524       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2525   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2526     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2527         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2528     return;
2529   }
2530 
2531   if (!CGF.HaveInsertPoint())
2532     return;
2533   // Build call __kmpc_cancel_barrier(loc, thread_id);
2534   // Build call __kmpc_barrier(loc, thread_id);
2535   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2536   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2537   // thread_id);
2538   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2539                          getThreadID(CGF, Loc)};
2540   if (OMPRegionInfo) {
2541     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2542       llvm::Value *Result = CGF.EmitRuntimeCall(
2543           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2544                                                 OMPRTL___kmpc_cancel_barrier),
2545           Args);
2546       if (EmitChecks) {
2547         // if (__kmpc_cancel_barrier()) {
2548         //   exit from construct;
2549         // }
2550         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2551         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2552         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2553         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2554         CGF.EmitBlock(ExitBB);
2555         //   exit from construct;
2556         CodeGenFunction::JumpDest CancelDestination =
2557             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2558         CGF.EmitBranchThroughCleanup(CancelDestination);
2559         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2560       }
2561       return;
2562     }
2563   }
2564   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2565                           CGM.getModule(), OMPRTL___kmpc_barrier),
2566                       Args);
2567 }
2568 
2569 /// Map the OpenMP loop schedule to the runtime enumeration.
2570 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2571                                           bool Chunked, bool Ordered) {
2572   switch (ScheduleKind) {
2573   case OMPC_SCHEDULE_static:
2574     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2575                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2576   case OMPC_SCHEDULE_dynamic:
2577     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2578   case OMPC_SCHEDULE_guided:
2579     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2580   case OMPC_SCHEDULE_runtime:
2581     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2582   case OMPC_SCHEDULE_auto:
2583     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2584   case OMPC_SCHEDULE_unknown:
2585     assert(!Chunked && "chunk was specified but schedule kind not known");
2586     return Ordered ? OMP_ord_static : OMP_sch_static;
2587   }
2588   llvm_unreachable("Unexpected runtime schedule");
2589 }
2590 
2591 /// Map the OpenMP distribute schedule to the runtime enumeration.
2592 static OpenMPSchedType
2593 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2594   // only static is allowed for dist_schedule
2595   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2596 }
2597 
2598 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2599                                          bool Chunked) const {
2600   OpenMPSchedType Schedule =
2601       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2602   return Schedule == OMP_sch_static;
2603 }
2604 
2605 bool CGOpenMPRuntime::isStaticNonchunked(
2606     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2607   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2608   return Schedule == OMP_dist_sch_static;
2609 }
2610 
2611 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2612                                       bool Chunked) const {
2613   OpenMPSchedType Schedule =
2614       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2615   return Schedule == OMP_sch_static_chunked;
2616 }
2617 
2618 bool CGOpenMPRuntime::isStaticChunked(
2619     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2620   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2621   return Schedule == OMP_dist_sch_static_chunked;
2622 }
2623 
2624 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2625   OpenMPSchedType Schedule =
2626       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2627   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2628   return Schedule != OMP_sch_static;
2629 }
2630 
2631 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2632                                   OpenMPScheduleClauseModifier M1,
2633                                   OpenMPScheduleClauseModifier M2) {
2634   int Modifier = 0;
2635   switch (M1) {
2636   case OMPC_SCHEDULE_MODIFIER_monotonic:
2637     Modifier = OMP_sch_modifier_monotonic;
2638     break;
2639   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2640     Modifier = OMP_sch_modifier_nonmonotonic;
2641     break;
2642   case OMPC_SCHEDULE_MODIFIER_simd:
2643     if (Schedule == OMP_sch_static_chunked)
2644       Schedule = OMP_sch_static_balanced_chunked;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_last:
2647   case OMPC_SCHEDULE_MODIFIER_unknown:
2648     break;
2649   }
2650   switch (M2) {
2651   case OMPC_SCHEDULE_MODIFIER_monotonic:
2652     Modifier = OMP_sch_modifier_monotonic;
2653     break;
2654   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2655     Modifier = OMP_sch_modifier_nonmonotonic;
2656     break;
2657   case OMPC_SCHEDULE_MODIFIER_simd:
2658     if (Schedule == OMP_sch_static_chunked)
2659       Schedule = OMP_sch_static_balanced_chunked;
2660     break;
2661   case OMPC_SCHEDULE_MODIFIER_last:
2662   case OMPC_SCHEDULE_MODIFIER_unknown:
2663     break;
2664   }
2665   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2666   // If the static schedule kind is specified or if the ordered clause is
2667   // specified, and if the nonmonotonic modifier is not specified, the effect is
2668   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2669   // modifier is specified, the effect is as if the nonmonotonic modifier is
2670   // specified.
2671   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2672     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2673           Schedule == OMP_sch_static_balanced_chunked ||
2674           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2675           Schedule == OMP_dist_sch_static_chunked ||
2676           Schedule == OMP_dist_sch_static))
2677       Modifier = OMP_sch_modifier_nonmonotonic;
2678   }
2679   return Schedule | Modifier;
2680 }
2681 
2682 void CGOpenMPRuntime::emitForDispatchInit(
2683     CodeGenFunction &CGF, SourceLocation Loc,
2684     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2685     bool Ordered, const DispatchRTInput &DispatchValues) {
2686   if (!CGF.HaveInsertPoint())
2687     return;
2688   OpenMPSchedType Schedule = getRuntimeSchedule(
2689       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2690   assert(Ordered ||
2691          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2692           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2693           Schedule != OMP_sch_static_balanced_chunked));
2694   // Call __kmpc_dispatch_init(
2695   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2696   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2697   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2698 
2699   // If the Chunk was not specified in the clause - use default value 1.
2700   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2701                                             : CGF.Builder.getIntN(IVSize, 1);
2702   llvm::Value *Args[] = {
2703       emitUpdateLocation(CGF, Loc),
2704       getThreadID(CGF, Loc),
2705       CGF.Builder.getInt32(addMonoNonMonoModifier(
2706           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2707       DispatchValues.LB,                                     // Lower
2708       DispatchValues.UB,                                     // Upper
2709       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2710       Chunk                                                  // Chunk
2711   };
2712   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2713 }
2714 
2715 static void emitForStaticInitCall(
2716     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2717     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2718     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2719     const CGOpenMPRuntime::StaticRTInput &Values) {
2720   if (!CGF.HaveInsertPoint())
2721     return;
2722 
2723   assert(!Values.Ordered);
2724   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2725          Schedule == OMP_sch_static_balanced_chunked ||
2726          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2727          Schedule == OMP_dist_sch_static ||
2728          Schedule == OMP_dist_sch_static_chunked);
2729 
2730   // Call __kmpc_for_static_init(
2731   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2732   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2733   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2734   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2735   llvm::Value *Chunk = Values.Chunk;
2736   if (Chunk == nullptr) {
2737     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2738             Schedule == OMP_dist_sch_static) &&
2739            "expected static non-chunked schedule");
2740     // If the Chunk was not specified in the clause - use default value 1.
2741     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2742   } else {
2743     assert((Schedule == OMP_sch_static_chunked ||
2744             Schedule == OMP_sch_static_balanced_chunked ||
2745             Schedule == OMP_ord_static_chunked ||
2746             Schedule == OMP_dist_sch_static_chunked) &&
2747            "expected static chunked schedule");
2748   }
2749   llvm::Value *Args[] = {
2750       UpdateLocation,
2751       ThreadId,
2752       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2753                                                   M2)), // Schedule type
2754       Values.IL.getPointer(),                           // &isLastIter
2755       Values.LB.getPointer(),                           // &LB
2756       Values.UB.getPointer(),                           // &UB
2757       Values.ST.getPointer(),                           // &Stride
2758       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2759       Chunk                                             // Chunk
2760   };
2761   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2762 }
2763 
2764 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2765                                         SourceLocation Loc,
2766                                         OpenMPDirectiveKind DKind,
2767                                         const OpenMPScheduleTy &ScheduleKind,
2768                                         const StaticRTInput &Values) {
2769   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2770       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2771   assert(isOpenMPWorksharingDirective(DKind) &&
2772          "Expected loop-based or sections-based directive.");
2773   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2774                                              isOpenMPLoopDirective(DKind)
2775                                                  ? OMP_IDENT_WORK_LOOP
2776                                                  : OMP_IDENT_WORK_SECTIONS);
2777   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2778   llvm::FunctionCallee StaticInitFunction =
2779       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2780   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2781   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2782                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2783 }
2784 
2785 void CGOpenMPRuntime::emitDistributeStaticInit(
2786     CodeGenFunction &CGF, SourceLocation Loc,
2787     OpenMPDistScheduleClauseKind SchedKind,
2788     const CGOpenMPRuntime::StaticRTInput &Values) {
2789   OpenMPSchedType ScheduleNum =
2790       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2791   llvm::Value *UpdatedLocation =
2792       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2793   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2794   llvm::FunctionCallee StaticInitFunction =
2795       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2796   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2797                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2798                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2799 }
2800 
2801 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2802                                           SourceLocation Loc,
2803                                           OpenMPDirectiveKind DKind) {
2804   if (!CGF.HaveInsertPoint())
2805     return;
2806   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2807   llvm::Value *Args[] = {
2808       emitUpdateLocation(CGF, Loc,
2809                          isOpenMPDistributeDirective(DKind)
2810                              ? OMP_IDENT_WORK_DISTRIBUTE
2811                              : isOpenMPLoopDirective(DKind)
2812                                    ? OMP_IDENT_WORK_LOOP
2813                                    : OMP_IDENT_WORK_SECTIONS),
2814       getThreadID(CGF, Loc)};
2815   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2816   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2817                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2818                       Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2822                                                  SourceLocation Loc,
2823                                                  unsigned IVSize,
2824                                                  bool IVSigned) {
2825   if (!CGF.HaveInsertPoint())
2826     return;
2827   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2828   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2829   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2830 }
2831 
2832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2833                                           SourceLocation Loc, unsigned IVSize,
2834                                           bool IVSigned, Address IL,
2835                                           Address LB, Address UB,
2836                                           Address ST) {
2837   // Call __kmpc_dispatch_next(
2838   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2839   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2840   //          kmp_int[32|64] *p_stride);
2841   llvm::Value *Args[] = {
2842       emitUpdateLocation(CGF, Loc),
2843       getThreadID(CGF, Loc),
2844       IL.getPointer(), // &isLastIter
2845       LB.getPointer(), // &Lower
2846       UB.getPointer(), // &Upper
2847       ST.getPointer()  // &Stride
2848   };
2849   llvm::Value *Call =
2850       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2851   return CGF.EmitScalarConversion(
2852       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2853       CGF.getContext().BoolTy, Loc);
2854 }
2855 
2856 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2857                                            llvm::Value *NumThreads,
2858                                            SourceLocation Loc) {
2859   if (!CGF.HaveInsertPoint())
2860     return;
2861   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2862   llvm::Value *Args[] = {
2863       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2864       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2865   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2866                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2867                       Args);
2868 }
2869 
2870 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2871                                          ProcBindKind ProcBind,
2872                                          SourceLocation Loc) {
2873   if (!CGF.HaveInsertPoint())
2874     return;
2875   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2876   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2877   llvm::Value *Args[] = {
2878       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2879       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2880   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2881                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2882                       Args);
2883 }
2884 
2885 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2886                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2887   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2888     OMPBuilder.createFlush(CGF.Builder);
2889   } else {
2890     if (!CGF.HaveInsertPoint())
2891       return;
2892     // Build call void __kmpc_flush(ident_t *loc)
2893     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2894                             CGM.getModule(), OMPRTL___kmpc_flush),
2895                         emitUpdateLocation(CGF, Loc));
2896   }
2897 }
2898 
2899 namespace {
2900 /// Indexes of fields for type kmp_task_t.
2901 enum KmpTaskTFields {
2902   /// List of shared variables.
2903   KmpTaskTShareds,
2904   /// Task routine.
2905   KmpTaskTRoutine,
2906   /// Partition id for the untied tasks.
2907   KmpTaskTPartId,
2908   /// Function with call of destructors for private variables.
2909   Data1,
2910   /// Task priority.
2911   Data2,
2912   /// (Taskloops only) Lower bound.
2913   KmpTaskTLowerBound,
2914   /// (Taskloops only) Upper bound.
2915   KmpTaskTUpperBound,
2916   /// (Taskloops only) Stride.
2917   KmpTaskTStride,
2918   /// (Taskloops only) Is last iteration flag.
2919   KmpTaskTLastIter,
2920   /// (Taskloops only) Reduction data.
2921   KmpTaskTReductions,
2922 };
2923 } // anonymous namespace
2924 
2925 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2926   return OffloadEntriesTargetRegion.empty() &&
2927          OffloadEntriesDeviceGlobalVar.empty();
2928 }
2929 
2930 /// Initialize target region entry.
2931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2932     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2933                                     StringRef ParentName, unsigned LineNum,
2934                                     unsigned Order) {
2935   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2936                                              "only required for the device "
2937                                              "code generation.");
2938   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2939       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2940                                    OMPTargetRegionEntryTargetRegion);
2941   ++OffloadingEntriesNum;
2942 }
2943 
2944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2945     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2946                                   StringRef ParentName, unsigned LineNum,
2947                                   llvm::Constant *Addr, llvm::Constant *ID,
2948                                   OMPTargetRegionEntryKind Flags) {
2949   // If we are emitting code for a target, the entry is already initialized,
2950   // only has to be registered.
2951   if (CGM.getLangOpts().OpenMPIsDevice) {
2952     // This could happen if the device compilation is invoked standalone.
2953     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2954       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2955                                       OffloadingEntriesNum);
2956     auto &Entry =
2957         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2958     Entry.setAddress(Addr);
2959     Entry.setID(ID);
2960     Entry.setFlags(Flags);
2961   } else {
2962     if (Flags ==
2963             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2964         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2965                                  /*IgnoreAddressId*/ true))
2966       return;
2967     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2968            "Target region entry already registered!");
2969     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2970     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2971     ++OffloadingEntriesNum;
2972   }
2973 }
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2976     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2977     bool IgnoreAddressId) const {
2978   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2979   if (PerDevice == OffloadEntriesTargetRegion.end())
2980     return false;
2981   auto PerFile = PerDevice->second.find(FileID);
2982   if (PerFile == PerDevice->second.end())
2983     return false;
2984   auto PerParentName = PerFile->second.find(ParentName);
2985   if (PerParentName == PerFile->second.end())
2986     return false;
2987   auto PerLine = PerParentName->second.find(LineNum);
2988   if (PerLine == PerParentName->second.end())
2989     return false;
2990   // Fail if this entry is already registered.
2991   if (!IgnoreAddressId &&
2992       (PerLine->second.getAddress() || PerLine->second.getID()))
2993     return false;
2994   return true;
2995 }
2996 
2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2998     const OffloadTargetRegionEntryInfoActTy &Action) {
2999   // Scan all target region entries and perform the provided action.
3000   for (const auto &D : OffloadEntriesTargetRegion)
3001     for (const auto &F : D.second)
3002       for (const auto &P : F.second)
3003         for (const auto &L : P.second)
3004           Action(D.first, F.first, P.first(), L.first, L.second);
3005 }
3006 
3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3008     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3009                                        OMPTargetGlobalVarEntryKind Flags,
3010                                        unsigned Order) {
3011   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3012                                              "only required for the device "
3013                                              "code generation.");
3014   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3015   ++OffloadingEntriesNum;
3016 }
3017 
3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3020                                      CharUnits VarSize,
3021                                      OMPTargetGlobalVarEntryKind Flags,
3022                                      llvm::GlobalValue::LinkageTypes Linkage) {
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasDeviceGlobalVarEntryInfo(VarName))
3026       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3027     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3028     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3029            "Resetting with the new address.");
3030     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3031       if (Entry.getVarSize().isZero()) {
3032         Entry.setVarSize(VarSize);
3033         Entry.setLinkage(Linkage);
3034       }
3035       return;
3036     }
3037     Entry.setVarSize(VarSize);
3038     Entry.setLinkage(Linkage);
3039     Entry.setAddress(Addr);
3040   } else {
3041     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3042       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3043       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3044              "Entry not initialized!");
3045       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3046              "Resetting with the new address.");
3047       if (Entry.getVarSize().isZero()) {
3048         Entry.setVarSize(VarSize);
3049         Entry.setLinkage(Linkage);
3050       }
3051       return;
3052     }
3053     OffloadEntriesDeviceGlobalVar.try_emplace(
3054         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3055     ++OffloadingEntriesNum;
3056   }
3057 }
3058 
3059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3060     actOnDeviceGlobalVarEntriesInfo(
3061         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3062   // Scan all target region entries and perform the provided action.
3063   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3064     Action(E.getKey(), E.getValue());
3065 }
3066 
3067 void CGOpenMPRuntime::createOffloadEntry(
3068     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3069     llvm::GlobalValue::LinkageTypes Linkage) {
3070   StringRef Name = Addr->getName();
3071   llvm::Module &M = CGM.getModule();
3072   llvm::LLVMContext &C = M.getContext();
3073 
3074   // Create constant string with the name.
3075   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3076 
3077   std::string StringName = getName({"omp_offloading", "entry_name"});
3078   auto *Str = new llvm::GlobalVariable(
3079       M, StrPtrInit->getType(), /*isConstant=*/true,
3080       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3081   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3082 
3083   llvm::Constant *Data[] = {
3084       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3085       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3086       llvm::ConstantInt::get(CGM.SizeTy, Size),
3087       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3088       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3089   std::string EntryName = getName({"omp_offloading", "entry", ""});
3090   llvm::GlobalVariable *Entry = createGlobalStruct(
3091       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3092       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3093 
3094   // The entry has to be created in the section the linker expects it to be.
3095   Entry->setSection("omp_offloading_entries");
3096 }
3097 
3098 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3099   // Emit the offloading entries and metadata so that the device codegen side
3100   // can easily figure out what to emit. The produced metadata looks like
3101   // this:
3102   //
3103   // !omp_offload.info = !{!1, ...}
3104   //
3105   // Right now we only generate metadata for function that contain target
3106   // regions.
3107 
3108   // If we are in simd mode or there are no entries, we don't need to do
3109   // anything.
3110   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3111     return;
3112 
3113   llvm::Module &M = CGM.getModule();
3114   llvm::LLVMContext &C = M.getContext();
3115   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3116                          SourceLocation, StringRef>,
3117               16>
3118       OrderedEntries(OffloadEntriesInfoManager.size());
3119   llvm::SmallVector<StringRef, 16> ParentFunctions(
3120       OffloadEntriesInfoManager.size());
3121 
3122   // Auxiliary methods to create metadata values and strings.
3123   auto &&GetMDInt = [this](unsigned V) {
3124     return llvm::ConstantAsMetadata::get(
3125         llvm::ConstantInt::get(CGM.Int32Ty, V));
3126   };
3127 
3128   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3129 
3130   // Create the offloading info metadata node.
3131   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3132 
3133   // Create function that emits metadata for each target region entry;
3134   auto &&TargetRegionMetadataEmitter =
3135       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3136        &GetMDString](
3137           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3138           unsigned Line,
3139           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3140         // Generate metadata for target regions. Each entry of this metadata
3141         // contains:
3142         // - Entry 0 -> Kind of this type of metadata (0).
3143         // - Entry 1 -> Device ID of the file where the entry was identified.
3144         // - Entry 2 -> File ID of the file where the entry was identified.
3145         // - Entry 3 -> Mangled name of the function where the entry was
3146         // identified.
3147         // - Entry 4 -> Line in the file where the entry was identified.
3148         // - Entry 5 -> Order the entry was created.
3149         // The first element of the metadata node is the kind.
3150         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3151                                  GetMDInt(FileID),      GetMDString(ParentName),
3152                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3153 
3154         SourceLocation Loc;
3155         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3156                   E = CGM.getContext().getSourceManager().fileinfo_end();
3157              I != E; ++I) {
3158           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3159               I->getFirst()->getUniqueID().getFile() == FileID) {
3160             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3161                 I->getFirst(), Line, 1);
3162             break;
3163           }
3164         }
3165         // Save this entry in the right position of the ordered entries array.
3166         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3167         ParentFunctions[E.getOrder()] = ParentName;
3168 
3169         // Add metadata to the named metadata node.
3170         MD->addOperand(llvm::MDNode::get(C, Ops));
3171       };
3172 
3173   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3174       TargetRegionMetadataEmitter);
3175 
3176   // Create function that emits metadata for each device global variable entry;
3177   auto &&DeviceGlobalVarMetadataEmitter =
3178       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3179        MD](StringRef MangledName,
3180            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3181                &E) {
3182         // Generate metadata for global variables. Each entry of this metadata
3183         // contains:
3184         // - Entry 0 -> Kind of this type of metadata (1).
3185         // - Entry 1 -> Mangled name of the variable.
3186         // - Entry 2 -> Declare target kind.
3187         // - Entry 3 -> Order the entry was created.
3188         // The first element of the metadata node is the kind.
3189         llvm::Metadata *Ops[] = {
3190             GetMDInt(E.getKind()), GetMDString(MangledName),
3191             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3192 
3193         // Save this entry in the right position of the ordered entries array.
3194         OrderedEntries[E.getOrder()] =
3195             std::make_tuple(&E, SourceLocation(), MangledName);
3196 
3197         // Add metadata to the named metadata node.
3198         MD->addOperand(llvm::MDNode::get(C, Ops));
3199       };
3200 
3201   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3202       DeviceGlobalVarMetadataEmitter);
3203 
3204   for (const auto &E : OrderedEntries) {
3205     assert(std::get<0>(E) && "All ordered entries must exist!");
3206     if (const auto *CE =
3207             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3208                 std::get<0>(E))) {
3209       if (!CE->getID() || !CE->getAddress()) {
3210         // Do not blame the entry if the parent funtion is not emitted.
3211         StringRef FnName = ParentFunctions[CE->getOrder()];
3212         if (!CGM.GetGlobalValue(FnName))
3213           continue;
3214         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3215             DiagnosticsEngine::Error,
3216             "Offloading entry for target region in %0 is incorrect: either the "
3217             "address or the ID is invalid.");
3218         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3219         continue;
3220       }
3221       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3222                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3223     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3224                                              OffloadEntryInfoDeviceGlobalVar>(
3225                    std::get<0>(E))) {
3226       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3227           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3228               CE->getFlags());
3229       switch (Flags) {
3230       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3231         if (CGM.getLangOpts().OpenMPIsDevice &&
3232             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3233           continue;
3234         if (!CE->getAddress()) {
3235           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3236               DiagnosticsEngine::Error, "Offloading entry for declare target "
3237                                         "variable %0 is incorrect: the "
3238                                         "address is invalid.");
3239           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3240           continue;
3241         }
3242         // The vaiable has no definition - no need to add the entry.
3243         if (CE->getVarSize().isZero())
3244           continue;
3245         break;
3246       }
3247       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3248         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3249                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3250                "Declaret target link address is set.");
3251         if (CGM.getLangOpts().OpenMPIsDevice)
3252           continue;
3253         if (!CE->getAddress()) {
3254           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255               DiagnosticsEngine::Error,
3256               "Offloading entry for declare target variable is incorrect: the "
3257               "address is invalid.");
3258           CGM.getDiags().Report(DiagID);
3259           continue;
3260         }
3261         break;
3262       }
3263       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3264                          CE->getVarSize().getQuantity(), Flags,
3265                          CE->getLinkage());
3266     } else {
3267       llvm_unreachable("Unsupported entry kind.");
3268     }
3269   }
3270 }
3271 
3272 /// Loads all the offload entries information from the host IR
3273 /// metadata.
3274 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3275   // If we are in target mode, load the metadata from the host IR. This code has
3276   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3277 
3278   if (!CGM.getLangOpts().OpenMPIsDevice)
3279     return;
3280 
3281   if (CGM.getLangOpts().OMPHostIRFile.empty())
3282     return;
3283 
3284   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3285   if (auto EC = Buf.getError()) {
3286     CGM.getDiags().Report(diag::err_cannot_open_file)
3287         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3288     return;
3289   }
3290 
3291   llvm::LLVMContext C;
3292   auto ME = expectedToErrorOrAndEmitErrors(
3293       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3294 
3295   if (auto EC = ME.getError()) {
3296     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3297         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3298     CGM.getDiags().Report(DiagID)
3299         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3300     return;
3301   }
3302 
3303   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3304   if (!MD)
3305     return;
3306 
3307   for (llvm::MDNode *MN : MD->operands()) {
3308     auto &&GetMDInt = [MN](unsigned Idx) {
3309       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3310       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3311     };
3312 
3313     auto &&GetMDString = [MN](unsigned Idx) {
3314       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3315       return V->getString();
3316     };
3317 
3318     switch (GetMDInt(0)) {
3319     default:
3320       llvm_unreachable("Unexpected metadata!");
3321       break;
3322     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3323         OffloadingEntryInfoTargetRegion:
3324       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3325           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3326           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3327           /*Order=*/GetMDInt(5));
3328       break;
3329     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3330         OffloadingEntryInfoDeviceGlobalVar:
3331       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3332           /*MangledName=*/GetMDString(1),
3333           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3334               /*Flags=*/GetMDInt(2)),
3335           /*Order=*/GetMDInt(3));
3336       break;
3337     }
3338   }
3339 }
3340 
3341 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3342   if (!KmpRoutineEntryPtrTy) {
3343     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3344     ASTContext &C = CGM.getContext();
3345     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3346     FunctionProtoType::ExtProtoInfo EPI;
3347     KmpRoutineEntryPtrQTy = C.getPointerType(
3348         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3349     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3350   }
3351 }
3352 
3353 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3354   // Make sure the type of the entry is already created. This is the type we
3355   // have to create:
3356   // struct __tgt_offload_entry{
3357   //   void      *addr;       // Pointer to the offload entry info.
3358   //                          // (function or global)
3359   //   char      *name;       // Name of the function or global.
3360   //   size_t     size;       // Size of the entry info (0 if it a function).
3361   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3362   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3363   // };
3364   if (TgtOffloadEntryQTy.isNull()) {
3365     ASTContext &C = CGM.getContext();
3366     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3367     RD->startDefinition();
3368     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3369     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3370     addFieldToRecordDecl(C, RD, C.getSizeType());
3371     addFieldToRecordDecl(
3372         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3373     addFieldToRecordDecl(
3374         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3375     RD->completeDefinition();
3376     RD->addAttr(PackedAttr::CreateImplicit(C));
3377     TgtOffloadEntryQTy = C.getRecordType(RD);
3378   }
3379   return TgtOffloadEntryQTy;
3380 }
3381 
3382 namespace {
3383 struct PrivateHelpersTy {
3384   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3385                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3386       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3387         PrivateElemInit(PrivateElemInit) {}
3388   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3389   const Expr *OriginalRef = nullptr;
3390   const VarDecl *Original = nullptr;
3391   const VarDecl *PrivateCopy = nullptr;
3392   const VarDecl *PrivateElemInit = nullptr;
3393   bool isLocalPrivate() const {
3394     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3395   }
3396 };
3397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3398 } // anonymous namespace
3399 
3400 static bool isAllocatableDecl(const VarDecl *VD) {
3401   const VarDecl *CVD = VD->getCanonicalDecl();
3402   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3403     return false;
3404   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3405   // Use the default allocation.
3406   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3407             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3408            !AA->getAllocator());
3409 }
3410 
3411 static RecordDecl *
3412 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3413   if (!Privates.empty()) {
3414     ASTContext &C = CGM.getContext();
3415     // Build struct .kmp_privates_t. {
3416     //         /*  private vars  */
3417     //       };
3418     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3419     RD->startDefinition();
3420     for (const auto &Pair : Privates) {
3421       const VarDecl *VD = Pair.second.Original;
3422       QualType Type = VD->getType().getNonReferenceType();
3423       // If the private variable is a local variable with lvalue ref type,
3424       // allocate the pointer instead of the pointee type.
3425       if (Pair.second.isLocalPrivate()) {
3426         if (VD->getType()->isLValueReferenceType())
3427           Type = C.getPointerType(Type);
3428         if (isAllocatableDecl(VD))
3429           Type = C.getPointerType(Type);
3430       }
3431       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3432       if (VD->hasAttrs()) {
3433         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3434              E(VD->getAttrs().end());
3435              I != E; ++I)
3436           FD->addAttr(*I);
3437       }
3438     }
3439     RD->completeDefinition();
3440     return RD;
3441   }
3442   return nullptr;
3443 }
3444 
3445 static RecordDecl *
3446 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3447                          QualType KmpInt32Ty,
3448                          QualType KmpRoutineEntryPointerQTy) {
3449   ASTContext &C = CGM.getContext();
3450   // Build struct kmp_task_t {
3451   //         void *              shareds;
3452   //         kmp_routine_entry_t routine;
3453   //         kmp_int32           part_id;
3454   //         kmp_cmplrdata_t data1;
3455   //         kmp_cmplrdata_t data2;
3456   // For taskloops additional fields:
3457   //         kmp_uint64          lb;
3458   //         kmp_uint64          ub;
3459   //         kmp_int64           st;
3460   //         kmp_int32           liter;
3461   //         void *              reductions;
3462   //       };
3463   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3464   UD->startDefinition();
3465   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3466   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3467   UD->completeDefinition();
3468   QualType KmpCmplrdataTy = C.getRecordType(UD);
3469   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3470   RD->startDefinition();
3471   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3472   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3473   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3474   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3475   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3476   if (isOpenMPTaskLoopDirective(Kind)) {
3477     QualType KmpUInt64Ty =
3478         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3479     QualType KmpInt64Ty =
3480         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3481     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3482     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3483     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3484     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3485     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3486   }
3487   RD->completeDefinition();
3488   return RD;
3489 }
3490 
3491 static RecordDecl *
3492 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3493                                      ArrayRef<PrivateDataTy> Privates) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t_with_privates {
3496   //         kmp_task_t task_data;
3497   //         .kmp_privates_t. privates;
3498   //       };
3499   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3500   RD->startDefinition();
3501   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3502   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3503     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3504   RD->completeDefinition();
3505   return RD;
3506 }
3507 
3508 /// Emit a proxy function which accepts kmp_task_t as the second
3509 /// argument.
3510 /// \code
3511 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3512 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3513 ///   For taskloops:
3514 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3515 ///   tt->reductions, tt->shareds);
3516 ///   return 0;
3517 /// }
3518 /// \endcode
3519 static llvm::Function *
3520 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3521                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3522                       QualType KmpTaskTWithPrivatesPtrQTy,
3523                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3524                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3525                       llvm::Value *TaskPrivatesMap) {
3526   ASTContext &C = CGM.getContext();
3527   FunctionArgList Args;
3528   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3529                             ImplicitParamDecl::Other);
3530   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3531                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3532                                 ImplicitParamDecl::Other);
3533   Args.push_back(&GtidArg);
3534   Args.push_back(&TaskTypeArg);
3535   const auto &TaskEntryFnInfo =
3536       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3537   llvm::FunctionType *TaskEntryTy =
3538       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3539   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3540   auto *TaskEntry = llvm::Function::Create(
3541       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3542   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3543   TaskEntry->setDoesNotRecurse();
3544   CodeGenFunction CGF(CGM);
3545   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3546                     Loc, Loc);
3547 
3548   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3549   // tt,
3550   // For taskloops:
3551   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3552   // tt->task_data.shareds);
3553   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3554       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3555   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3556       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3557       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3558   const auto *KmpTaskTWithPrivatesQTyRD =
3559       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3560   LValue Base =
3561       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3562   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3563   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3564   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3565   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3566 
3567   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3568   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3569   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3570       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3571       CGF.ConvertTypeForMem(SharedsPtrTy));
3572 
3573   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3574   llvm::Value *PrivatesParam;
3575   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3576     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3577     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3578         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3579   } else {
3580     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3581   }
3582 
3583   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3584                                TaskPrivatesMap,
3585                                CGF.Builder
3586                                    .CreatePointerBitCastOrAddrSpaceCast(
3587                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3588                                    .getPointer()};
3589   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3590                                           std::end(CommonArgs));
3591   if (isOpenMPTaskLoopDirective(Kind)) {
3592     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3593     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3594     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3595     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3596     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3597     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3598     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3599     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3600     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3601     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3602     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3603     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3604     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3605     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3606     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3607     CallArgs.push_back(LBParam);
3608     CallArgs.push_back(UBParam);
3609     CallArgs.push_back(StParam);
3610     CallArgs.push_back(LIParam);
3611     CallArgs.push_back(RParam);
3612   }
3613   CallArgs.push_back(SharedsParam);
3614 
3615   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3616                                                   CallArgs);
3617   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3618                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3619   CGF.FinishFunction();
3620   return TaskEntry;
3621 }
3622 
3623 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3624                                             SourceLocation Loc,
3625                                             QualType KmpInt32Ty,
3626                                             QualType KmpTaskTWithPrivatesPtrQTy,
3627                                             QualType KmpTaskTWithPrivatesQTy) {
3628   ASTContext &C = CGM.getContext();
3629   FunctionArgList Args;
3630   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3631                             ImplicitParamDecl::Other);
3632   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3633                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3634                                 ImplicitParamDecl::Other);
3635   Args.push_back(&GtidArg);
3636   Args.push_back(&TaskTypeArg);
3637   const auto &DestructorFnInfo =
3638       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3639   llvm::FunctionType *DestructorFnTy =
3640       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3641   std::string Name =
3642       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3643   auto *DestructorFn =
3644       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3645                              Name, &CGM.getModule());
3646   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3647                                     DestructorFnInfo);
3648   DestructorFn->setDoesNotRecurse();
3649   CodeGenFunction CGF(CGM);
3650   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3651                     Args, Loc, Loc);
3652 
3653   LValue Base = CGF.EmitLoadOfPointerLValue(
3654       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3655       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3656   const auto *KmpTaskTWithPrivatesQTyRD =
3657       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3658   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3659   Base = CGF.EmitLValueForField(Base, *FI);
3660   for (const auto *Field :
3661        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3662     if (QualType::DestructionKind DtorKind =
3663             Field->getType().isDestructedType()) {
3664       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3665       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3666     }
3667   }
3668   CGF.FinishFunction();
3669   return DestructorFn;
3670 }
3671 
3672 /// Emit a privates mapping function for correct handling of private and
3673 /// firstprivate variables.
3674 /// \code
3675 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3676 /// **noalias priv1,...,  <tyn> **noalias privn) {
3677 ///   *priv1 = &.privates.priv1;
3678 ///   ...;
3679 ///   *privn = &.privates.privn;
3680 /// }
3681 /// \endcode
3682 static llvm::Value *
3683 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3684                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3685                                ArrayRef<PrivateDataTy> Privates) {
3686   ASTContext &C = CGM.getContext();
3687   FunctionArgList Args;
3688   ImplicitParamDecl TaskPrivatesArg(
3689       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3690       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3691       ImplicitParamDecl::Other);
3692   Args.push_back(&TaskPrivatesArg);
3693   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3694   unsigned Counter = 1;
3695   for (const Expr *E : Data.PrivateVars) {
3696     Args.push_back(ImplicitParamDecl::Create(
3697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3698         C.getPointerType(C.getPointerType(E->getType()))
3699             .withConst()
3700             .withRestrict(),
3701         ImplicitParamDecl::Other));
3702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3703     PrivateVarsPos[VD] = Counter;
3704     ++Counter;
3705   }
3706   for (const Expr *E : Data.FirstprivateVars) {
3707     Args.push_back(ImplicitParamDecl::Create(
3708         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3709         C.getPointerType(C.getPointerType(E->getType()))
3710             .withConst()
3711             .withRestrict(),
3712         ImplicitParamDecl::Other));
3713     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   for (const Expr *E : Data.LastprivateVars) {
3718     Args.push_back(ImplicitParamDecl::Create(
3719         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3720         C.getPointerType(C.getPointerType(E->getType()))
3721             .withConst()
3722             .withRestrict(),
3723         ImplicitParamDecl::Other));
3724     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3725     PrivateVarsPos[VD] = Counter;
3726     ++Counter;
3727   }
3728   for (const VarDecl *VD : Data.PrivateLocals) {
3729     QualType Ty = VD->getType().getNonReferenceType();
3730     if (VD->getType()->isLValueReferenceType())
3731       Ty = C.getPointerType(Ty);
3732     if (isAllocatableDecl(VD))
3733       Ty = C.getPointerType(Ty);
3734     Args.push_back(ImplicitParamDecl::Create(
3735         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3736         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3737         ImplicitParamDecl::Other));
3738     PrivateVarsPos[VD] = Counter;
3739     ++Counter;
3740   }
3741   const auto &TaskPrivatesMapFnInfo =
3742       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3743   llvm::FunctionType *TaskPrivatesMapTy =
3744       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3745   std::string Name =
3746       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3747   auto *TaskPrivatesMap = llvm::Function::Create(
3748       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3749       &CGM.getModule());
3750   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3751                                     TaskPrivatesMapFnInfo);
3752   if (CGM.getLangOpts().Optimize) {
3753     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3754     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3755     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3756   }
3757   CodeGenFunction CGF(CGM);
3758   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3759                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3760 
3761   // *privi = &.privates.privi;
3762   LValue Base = CGF.EmitLoadOfPointerLValue(
3763       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3764       TaskPrivatesArg.getType()->castAs<PointerType>());
3765   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3766   Counter = 0;
3767   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3768     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3769     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3770     LValue RefLVal =
3771         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3772     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3773         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3774     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3775     ++Counter;
3776   }
3777   CGF.FinishFunction();
3778   return TaskPrivatesMap;
3779 }
3780 
3781 /// Emit initialization for private variables in task-based directives.
3782 static void emitPrivatesInit(CodeGenFunction &CGF,
3783                              const OMPExecutableDirective &D,
3784                              Address KmpTaskSharedsPtr, LValue TDBase,
3785                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3786                              QualType SharedsTy, QualType SharedsPtrTy,
3787                              const OMPTaskDataTy &Data,
3788                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3789   ASTContext &C = CGF.getContext();
3790   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3791   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3792   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3793                                  ? OMPD_taskloop
3794                                  : OMPD_task;
3795   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3796   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3797   LValue SrcBase;
3798   bool IsTargetTask =
3799       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3800       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3801   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3802   // PointersArray, SizesArray, and MappersArray. The original variables for
3803   // these arrays are not captured and we get their addresses explicitly.
3804   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3805       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3806     SrcBase = CGF.MakeAddrLValue(
3807         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3808             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3809         SharedsTy);
3810   }
3811   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3812   for (const PrivateDataTy &Pair : Privates) {
3813     // Do not initialize private locals.
3814     if (Pair.second.isLocalPrivate()) {
3815       ++FI;
3816       continue;
3817     }
3818     const VarDecl *VD = Pair.second.PrivateCopy;
3819     const Expr *Init = VD->getAnyInitializer();
3820     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3821                              !CGF.isTrivialInitializer(Init)))) {
3822       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3823       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3824         const VarDecl *OriginalVD = Pair.second.Original;
3825         // Check if the variable is the target-based BasePointersArray,
3826         // PointersArray, SizesArray, or MappersArray.
3827         LValue SharedRefLValue;
3828         QualType Type = PrivateLValue.getType();
3829         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3830         if (IsTargetTask && !SharedField) {
3831           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3832                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3833                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3834                          ->getNumParams() == 0 &&
3835                  isa<TranslationUnitDecl>(
3836                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3837                          ->getDeclContext()) &&
3838                  "Expected artificial target data variable.");
3839           SharedRefLValue =
3840               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3841         } else if (ForDup) {
3842           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3843           SharedRefLValue = CGF.MakeAddrLValue(
3844               Address(SharedRefLValue.getPointer(CGF),
3845                       C.getDeclAlign(OriginalVD)),
3846               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3847               SharedRefLValue.getTBAAInfo());
3848         } else if (CGF.LambdaCaptureFields.count(
3849                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3850                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3851           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3852         } else {
3853           // Processing for implicitly captured variables.
3854           InlinedOpenMPRegionRAII Region(
3855               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3856               /*HasCancel=*/false);
3857           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3858         }
3859         if (Type->isArrayType()) {
3860           // Initialize firstprivate array.
3861           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3862             // Perform simple memcpy.
3863             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3864           } else {
3865             // Initialize firstprivate array using element-by-element
3866             // initialization.
3867             CGF.EmitOMPAggregateAssign(
3868                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3869                 Type,
3870                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3871                                                   Address SrcElement) {
3872                   // Clean up any temporaries needed by the initialization.
3873                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3874                   InitScope.addPrivate(
3875                       Elem, [SrcElement]() -> Address { return SrcElement; });
3876                   (void)InitScope.Privatize();
3877                   // Emit initialization for single element.
3878                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3879                       CGF, &CapturesInfo);
3880                   CGF.EmitAnyExprToMem(Init, DestElement,
3881                                        Init->getType().getQualifiers(),
3882                                        /*IsInitializer=*/false);
3883                 });
3884           }
3885         } else {
3886           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3887           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3888             return SharedRefLValue.getAddress(CGF);
3889           });
3890           (void)InitScope.Privatize();
3891           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3892           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3893                              /*capturedByInit=*/false);
3894         }
3895       } else {
3896         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3897       }
3898     }
3899     ++FI;
3900   }
3901 }
3902 
3903 /// Check if duplication function is required for taskloops.
3904 static bool checkInitIsRequired(CodeGenFunction &CGF,
3905                                 ArrayRef<PrivateDataTy> Privates) {
3906   bool InitRequired = false;
3907   for (const PrivateDataTy &Pair : Privates) {
3908     if (Pair.second.isLocalPrivate())
3909       continue;
3910     const VarDecl *VD = Pair.second.PrivateCopy;
3911     const Expr *Init = VD->getAnyInitializer();
3912     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3913                                     !CGF.isTrivialInitializer(Init));
3914     if (InitRequired)
3915       break;
3916   }
3917   return InitRequired;
3918 }
3919 
3920 
3921 /// Emit task_dup function (for initialization of
3922 /// private/firstprivate/lastprivate vars and last_iter flag)
3923 /// \code
3924 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3925 /// lastpriv) {
3926 /// // setup lastprivate flag
3927 ///    task_dst->last = lastpriv;
3928 /// // could be constructor calls here...
3929 /// }
3930 /// \endcode
3931 static llvm::Value *
3932 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3933                     const OMPExecutableDirective &D,
3934                     QualType KmpTaskTWithPrivatesPtrQTy,
3935                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3936                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3937                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3938                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3939   ASTContext &C = CGM.getContext();
3940   FunctionArgList Args;
3941   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3942                            KmpTaskTWithPrivatesPtrQTy,
3943                            ImplicitParamDecl::Other);
3944   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3945                            KmpTaskTWithPrivatesPtrQTy,
3946                            ImplicitParamDecl::Other);
3947   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3948                                 ImplicitParamDecl::Other);
3949   Args.push_back(&DstArg);
3950   Args.push_back(&SrcArg);
3951   Args.push_back(&LastprivArg);
3952   const auto &TaskDupFnInfo =
3953       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3954   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3955   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3956   auto *TaskDup = llvm::Function::Create(
3957       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3958   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3959   TaskDup->setDoesNotRecurse();
3960   CodeGenFunction CGF(CGM);
3961   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3962                     Loc);
3963 
3964   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3965       CGF.GetAddrOfLocalVar(&DstArg),
3966       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3967   // task_dst->liter = lastpriv;
3968   if (WithLastIter) {
3969     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3970     LValue Base = CGF.EmitLValueForField(
3971         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3972     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3973     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3974         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3975     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3976   }
3977 
3978   // Emit initial values for private copies (if any).
3979   assert(!Privates.empty());
3980   Address KmpTaskSharedsPtr = Address::invalid();
3981   if (!Data.FirstprivateVars.empty()) {
3982     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3983         CGF.GetAddrOfLocalVar(&SrcArg),
3984         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3985     LValue Base = CGF.EmitLValueForField(
3986         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3987     KmpTaskSharedsPtr = Address(
3988         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3989                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3990                                                   KmpTaskTShareds)),
3991                              Loc),
3992         CGM.getNaturalTypeAlignment(SharedsTy));
3993   }
3994   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3995                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3996   CGF.FinishFunction();
3997   return TaskDup;
3998 }
3999 
4000 /// Checks if destructor function is required to be generated.
4001 /// \return true if cleanups are required, false otherwise.
4002 static bool
4003 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4004                          ArrayRef<PrivateDataTy> Privates) {
4005   for (const PrivateDataTy &P : Privates) {
4006     if (P.second.isLocalPrivate())
4007       continue;
4008     QualType Ty = P.second.Original->getType().getNonReferenceType();
4009     if (Ty.isDestructedType())
4010       return true;
4011   }
4012   return false;
4013 }
4014 
4015 namespace {
4016 /// Loop generator for OpenMP iterator expression.
4017 class OMPIteratorGeneratorScope final
4018     : public CodeGenFunction::OMPPrivateScope {
4019   CodeGenFunction &CGF;
4020   const OMPIteratorExpr *E = nullptr;
4021   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4022   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4023   OMPIteratorGeneratorScope() = delete;
4024   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4025 
4026 public:
4027   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4028       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4029     if (!E)
4030       return;
4031     SmallVector<llvm::Value *, 4> Uppers;
4032     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4033       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4034       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4035       addPrivate(VD, [&CGF, VD]() {
4036         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4037       });
4038       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4039       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4040         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4041                                  "counter.addr");
4042       });
4043     }
4044     Privatize();
4045 
4046     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4047       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4048       LValue CLVal =
4049           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4050                              HelperData.CounterVD->getType());
4051       // Counter = 0;
4052       CGF.EmitStoreOfScalar(
4053           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4054           CLVal);
4055       CodeGenFunction::JumpDest &ContDest =
4056           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4057       CodeGenFunction::JumpDest &ExitDest =
4058           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4059       // N = <number-of_iterations>;
4060       llvm::Value *N = Uppers[I];
4061       // cont:
4062       // if (Counter < N) goto body; else goto exit;
4063       CGF.EmitBlock(ContDest.getBlock());
4064       auto *CVal =
4065           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4066       llvm::Value *Cmp =
4067           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4068               ? CGF.Builder.CreateICmpSLT(CVal, N)
4069               : CGF.Builder.CreateICmpULT(CVal, N);
4070       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4071       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4072       // body:
4073       CGF.EmitBlock(BodyBB);
4074       // Iteri = Begini + Counter * Stepi;
4075       CGF.EmitIgnoredExpr(HelperData.Update);
4076     }
4077   }
4078   ~OMPIteratorGeneratorScope() {
4079     if (!E)
4080       return;
4081     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4082       // Counter = Counter + 1;
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4084       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4085       // goto cont;
4086       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4087       // exit:
4088       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4089     }
4090   }
4091 };
4092 } // namespace
4093 
4094 static std::pair<llvm::Value *, llvm::Value *>
4095 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4096   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4097   llvm::Value *Addr;
4098   if (OASE) {
4099     const Expr *Base = OASE->getBase();
4100     Addr = CGF.EmitScalarExpr(Base);
4101   } else {
4102     Addr = CGF.EmitLValue(E).getPointer(CGF);
4103   }
4104   llvm::Value *SizeVal;
4105   QualType Ty = E->getType();
4106   if (OASE) {
4107     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4108     for (const Expr *SE : OASE->getDimensions()) {
4109       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4110       Sz = CGF.EmitScalarConversion(
4111           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4112       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4113     }
4114   } else if (const auto *ASE =
4115                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4116     LValue UpAddrLVal =
4117         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4118     llvm::Value *UpAddr =
4119         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4120     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4121     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4122     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4123   } else {
4124     SizeVal = CGF.getTypeSize(Ty);
4125   }
4126   return std::make_pair(Addr, SizeVal);
4127 }
4128 
4129 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4130 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4131   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4132   if (KmpTaskAffinityInfoTy.isNull()) {
4133     RecordDecl *KmpAffinityInfoRD =
4134         C.buildImplicitRecord("kmp_task_affinity_info_t");
4135     KmpAffinityInfoRD->startDefinition();
4136     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4137     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4138     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4139     KmpAffinityInfoRD->completeDefinition();
4140     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4141   }
4142 }
4143 
4144 CGOpenMPRuntime::TaskResultTy
4145 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4146                               const OMPExecutableDirective &D,
4147                               llvm::Function *TaskFunction, QualType SharedsTy,
4148                               Address Shareds, const OMPTaskDataTy &Data) {
4149   ASTContext &C = CGM.getContext();
4150   llvm::SmallVector<PrivateDataTy, 4> Privates;
4151   // Aggregate privates and sort them by the alignment.
4152   const auto *I = Data.PrivateCopies.begin();
4153   for (const Expr *E : Data.PrivateVars) {
4154     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4155     Privates.emplace_back(
4156         C.getDeclAlign(VD),
4157         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4158                          /*PrivateElemInit=*/nullptr));
4159     ++I;
4160   }
4161   I = Data.FirstprivateCopies.begin();
4162   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4163   for (const Expr *E : Data.FirstprivateVars) {
4164     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4165     Privates.emplace_back(
4166         C.getDeclAlign(VD),
4167         PrivateHelpersTy(
4168             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4169             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4170     ++I;
4171     ++IElemInitRef;
4172   }
4173   I = Data.LastprivateCopies.begin();
4174   for (const Expr *E : Data.LastprivateVars) {
4175     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4176     Privates.emplace_back(
4177         C.getDeclAlign(VD),
4178         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4179                          /*PrivateElemInit=*/nullptr));
4180     ++I;
4181   }
4182   for (const VarDecl *VD : Data.PrivateLocals) {
4183     if (isAllocatableDecl(VD))
4184       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4185     else
4186       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4187   }
4188   llvm::stable_sort(Privates,
4189                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4190                       return L.first > R.first;
4191                     });
4192   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4193   // Build type kmp_routine_entry_t (if not built yet).
4194   emitKmpRoutineEntryT(KmpInt32Ty);
4195   // Build type kmp_task_t (if not built yet).
4196   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4197     if (SavedKmpTaskloopTQTy.isNull()) {
4198       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4199           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4200     }
4201     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4202   } else {
4203     assert((D.getDirectiveKind() == OMPD_task ||
4204             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4205             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4206            "Expected taskloop, task or target directive");
4207     if (SavedKmpTaskTQTy.isNull()) {
4208       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4209           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4210     }
4211     KmpTaskTQTy = SavedKmpTaskTQTy;
4212   }
4213   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4214   // Build particular struct kmp_task_t for the given task.
4215   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4216       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4217   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4218   QualType KmpTaskTWithPrivatesPtrQTy =
4219       C.getPointerType(KmpTaskTWithPrivatesQTy);
4220   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4221   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4222       KmpTaskTWithPrivatesTy->getPointerTo();
4223   llvm::Value *KmpTaskTWithPrivatesTySize =
4224       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4225   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4226 
4227   // Emit initial values for private copies (if any).
4228   llvm::Value *TaskPrivatesMap = nullptr;
4229   llvm::Type *TaskPrivatesMapTy =
4230       std::next(TaskFunction->arg_begin(), 3)->getType();
4231   if (!Privates.empty()) {
4232     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4233     TaskPrivatesMap =
4234         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4235     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4236         TaskPrivatesMap, TaskPrivatesMapTy);
4237   } else {
4238     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4239         cast<llvm::PointerType>(TaskPrivatesMapTy));
4240   }
4241   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4242   // kmp_task_t *tt);
4243   llvm::Function *TaskEntry = emitProxyTaskFunction(
4244       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4245       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4246       TaskPrivatesMap);
4247 
4248   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4249   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4250   // kmp_routine_entry_t *task_entry);
4251   // Task flags. Format is taken from
4252   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4253   // description of kmp_tasking_flags struct.
4254   enum {
4255     TiedFlag = 0x1,
4256     FinalFlag = 0x2,
4257     DestructorsFlag = 0x8,
4258     PriorityFlag = 0x20,
4259     DetachableFlag = 0x40,
4260   };
4261   unsigned Flags = Data.Tied ? TiedFlag : 0;
4262   bool NeedsCleanup = false;
4263   if (!Privates.empty()) {
4264     NeedsCleanup =
4265         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4266     if (NeedsCleanup)
4267       Flags = Flags | DestructorsFlag;
4268   }
4269   if (Data.Priority.getInt())
4270     Flags = Flags | PriorityFlag;
4271   if (D.hasClausesOfKind<OMPDetachClause>())
4272     Flags = Flags | DetachableFlag;
4273   llvm::Value *TaskFlags =
4274       Data.Final.getPointer()
4275           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4276                                      CGF.Builder.getInt32(FinalFlag),
4277                                      CGF.Builder.getInt32(/*C=*/0))
4278           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4279   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4280   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4281   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4282       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4283       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4284           TaskEntry, KmpRoutineEntryPtrTy)};
4285   llvm::Value *NewTask;
4286   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4287     // Check if we have any device clause associated with the directive.
4288     const Expr *Device = nullptr;
4289     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4290       Device = C->getDevice();
4291     // Emit device ID if any otherwise use default value.
4292     llvm::Value *DeviceID;
4293     if (Device)
4294       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4295                                            CGF.Int64Ty, /*isSigned=*/true);
4296     else
4297       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4298     AllocArgs.push_back(DeviceID);
4299     NewTask = CGF.EmitRuntimeCall(
4300         OMPBuilder.getOrCreateRuntimeFunction(
4301             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4302         AllocArgs);
4303   } else {
4304     NewTask =
4305         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4306                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4307                             AllocArgs);
4308   }
4309   // Emit detach clause initialization.
4310   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4311   // task_descriptor);
4312   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4313     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4314     LValue EvtLVal = CGF.EmitLValue(Evt);
4315 
4316     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4317     // int gtid, kmp_task_t *task);
4318     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4319     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4320     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4321     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4322         OMPBuilder.getOrCreateRuntimeFunction(
4323             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4324         {Loc, Tid, NewTask});
4325     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4326                                       Evt->getExprLoc());
4327     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4328   }
4329   // Process affinity clauses.
4330   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4331     // Process list of affinity data.
4332     ASTContext &C = CGM.getContext();
4333     Address AffinitiesArray = Address::invalid();
4334     // Calculate number of elements to form the array of affinity data.
4335     llvm::Value *NumOfElements = nullptr;
4336     unsigned NumAffinities = 0;
4337     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4338       if (const Expr *Modifier = C->getModifier()) {
4339         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4340         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4341           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4342           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4343           NumOfElements =
4344               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4345         }
4346       } else {
4347         NumAffinities += C->varlist_size();
4348       }
4349     }
4350     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4351     // Fields ids in kmp_task_affinity_info record.
4352     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4353 
4354     QualType KmpTaskAffinityInfoArrayTy;
4355     if (NumOfElements) {
4356       NumOfElements = CGF.Builder.CreateNUWAdd(
4357           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4358       OpaqueValueExpr OVE(
4359           Loc,
4360           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4361           VK_RValue);
4362       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4363                                                     RValue::get(NumOfElements));
4364       KmpTaskAffinityInfoArrayTy =
4365           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4366                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4367       // Properly emit variable-sized array.
4368       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4369                                            ImplicitParamDecl::Other);
4370       CGF.EmitVarDecl(*PD);
4371       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4372       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4373                                                 /*isSigned=*/false);
4374     } else {
4375       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4376           KmpTaskAffinityInfoTy,
4377           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4378           ArrayType::Normal, /*IndexTypeQuals=*/0);
4379       AffinitiesArray =
4380           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4381       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4382       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4383                                              /*isSigned=*/false);
4384     }
4385 
4386     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4387     // Fill array by elements without iterators.
4388     unsigned Pos = 0;
4389     bool HasIterator = false;
4390     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4391       if (C->getModifier()) {
4392         HasIterator = true;
4393         continue;
4394       }
4395       for (const Expr *E : C->varlists()) {
4396         llvm::Value *Addr;
4397         llvm::Value *Size;
4398         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4399         LValue Base =
4400             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4401                                KmpTaskAffinityInfoTy);
4402         // affs[i].base_addr = &<Affinities[i].second>;
4403         LValue BaseAddrLVal = CGF.EmitLValueForField(
4404             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4405         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4406                               BaseAddrLVal);
4407         // affs[i].len = sizeof(<Affinities[i].second>);
4408         LValue LenLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4410         CGF.EmitStoreOfScalar(Size, LenLVal);
4411         ++Pos;
4412       }
4413     }
4414     LValue PosLVal;
4415     if (HasIterator) {
4416       PosLVal = CGF.MakeAddrLValue(
4417           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4418           C.getSizeType());
4419       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4420     }
4421     // Process elements with iterators.
4422     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4423       const Expr *Modifier = C->getModifier();
4424       if (!Modifier)
4425         continue;
4426       OMPIteratorGeneratorScope IteratorScope(
4427           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4428       for (const Expr *E : C->varlists()) {
4429         llvm::Value *Addr;
4430         llvm::Value *Size;
4431         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4432         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4433         LValue Base = CGF.MakeAddrLValue(
4434             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4435                     AffinitiesArray.getAlignment()),
4436             KmpTaskAffinityInfoTy);
4437         // affs[i].base_addr = &<Affinities[i].second>;
4438         LValue BaseAddrLVal = CGF.EmitLValueForField(
4439             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4440         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4441                               BaseAddrLVal);
4442         // affs[i].len = sizeof(<Affinities[i].second>);
4443         LValue LenLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4445         CGF.EmitStoreOfScalar(Size, LenLVal);
4446         Idx = CGF.Builder.CreateNUWAdd(
4447             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4448         CGF.EmitStoreOfScalar(Idx, PosLVal);
4449       }
4450     }
4451     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4452     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4453     // naffins, kmp_task_affinity_info_t *affin_list);
4454     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4455     llvm::Value *GTid = getThreadID(CGF, Loc);
4456     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4457         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4458     // FIXME: Emit the function and ignore its result for now unless the
4459     // runtime function is properly implemented.
4460     (void)CGF.EmitRuntimeCall(
4461         OMPBuilder.getOrCreateRuntimeFunction(
4462             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4463         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4464   }
4465   llvm::Value *NewTaskNewTaskTTy =
4466       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4467           NewTask, KmpTaskTWithPrivatesPtrTy);
4468   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4469                                                KmpTaskTWithPrivatesQTy);
4470   LValue TDBase =
4471       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4472   // Fill the data in the resulting kmp_task_t record.
4473   // Copy shareds if there are any.
4474   Address KmpTaskSharedsPtr = Address::invalid();
4475   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4476     KmpTaskSharedsPtr =
4477         Address(CGF.EmitLoadOfScalar(
4478                     CGF.EmitLValueForField(
4479                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4480                                            KmpTaskTShareds)),
4481                     Loc),
4482                 CGM.getNaturalTypeAlignment(SharedsTy));
4483     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4484     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4485     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4486   }
4487   // Emit initial values for private copies (if any).
4488   TaskResultTy Result;
4489   if (!Privates.empty()) {
4490     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4491                      SharedsTy, SharedsPtrTy, Data, Privates,
4492                      /*ForDup=*/false);
4493     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4494         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4495       Result.TaskDupFn = emitTaskDupFunction(
4496           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4497           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4498           /*WithLastIter=*/!Data.LastprivateVars.empty());
4499     }
4500   }
4501   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4502   enum { Priority = 0, Destructors = 1 };
4503   // Provide pointer to function with destructors for privates.
4504   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4505   const RecordDecl *KmpCmplrdataUD =
4506       (*FI)->getType()->getAsUnionType()->getDecl();
4507   if (NeedsCleanup) {
4508     llvm::Value *DestructorFn = emitDestructorsFunction(
4509         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4510         KmpTaskTWithPrivatesQTy);
4511     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4512     LValue DestructorsLV = CGF.EmitLValueForField(
4513         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4514     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4515                               DestructorFn, KmpRoutineEntryPtrTy),
4516                           DestructorsLV);
4517   }
4518   // Set priority.
4519   if (Data.Priority.getInt()) {
4520     LValue Data2LV = CGF.EmitLValueForField(
4521         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4522     LValue PriorityLV = CGF.EmitLValueForField(
4523         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4524     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4525   }
4526   Result.NewTask = NewTask;
4527   Result.TaskEntry = TaskEntry;
4528   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4529   Result.TDBase = TDBase;
4530   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4531   return Result;
4532 }
4533 
4534 namespace {
4535 /// Dependence kind for RTL.
4536 enum RTLDependenceKindTy {
4537   DepIn = 0x01,
4538   DepInOut = 0x3,
4539   DepMutexInOutSet = 0x4
4540 };
4541 /// Fields ids in kmp_depend_info record.
4542 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4543 } // namespace
4544 
4545 /// Translates internal dependency kind into the runtime kind.
4546 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4547   RTLDependenceKindTy DepKind;
4548   switch (K) {
4549   case OMPC_DEPEND_in:
4550     DepKind = DepIn;
4551     break;
4552   // Out and InOut dependencies must use the same code.
4553   case OMPC_DEPEND_out:
4554   case OMPC_DEPEND_inout:
4555     DepKind = DepInOut;
4556     break;
4557   case OMPC_DEPEND_mutexinoutset:
4558     DepKind = DepMutexInOutSet;
4559     break;
4560   case OMPC_DEPEND_source:
4561   case OMPC_DEPEND_sink:
4562   case OMPC_DEPEND_depobj:
4563   case OMPC_DEPEND_unknown:
4564     llvm_unreachable("Unknown task dependence type");
4565   }
4566   return DepKind;
4567 }
4568 
4569 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4570 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4571                            QualType &FlagsTy) {
4572   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4573   if (KmpDependInfoTy.isNull()) {
4574     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4575     KmpDependInfoRD->startDefinition();
4576     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4577     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4578     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4579     KmpDependInfoRD->completeDefinition();
4580     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4581   }
4582 }
4583 
4584 std::pair<llvm::Value *, LValue>
4585 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4586                                    SourceLocation Loc) {
4587   ASTContext &C = CGM.getContext();
4588   QualType FlagsTy;
4589   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4590   RecordDecl *KmpDependInfoRD =
4591       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4592   LValue Base = CGF.EmitLoadOfPointerLValue(
4593       DepobjLVal.getAddress(CGF),
4594       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4595   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4596   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4597           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4598   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4599                             Base.getTBAAInfo());
4600   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4601       Addr.getPointer(),
4602       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4603   LValue NumDepsBase = CGF.MakeAddrLValue(
4604       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4605       Base.getBaseInfo(), Base.getTBAAInfo());
4606   // NumDeps = deps[i].base_addr;
4607   LValue BaseAddrLVal = CGF.EmitLValueForField(
4608       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4609   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4610   return std::make_pair(NumDeps, Base);
4611 }
4612 
4613 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4614                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4615                            const OMPTaskDataTy::DependData &Data,
4616                            Address DependenciesArray) {
4617   CodeGenModule &CGM = CGF.CGM;
4618   ASTContext &C = CGM.getContext();
4619   QualType FlagsTy;
4620   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4621   RecordDecl *KmpDependInfoRD =
4622       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4623   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4624 
4625   OMPIteratorGeneratorScope IteratorScope(
4626       CGF, cast_or_null<OMPIteratorExpr>(
4627                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4628                                  : nullptr));
4629   for (const Expr *E : Data.DepExprs) {
4630     llvm::Value *Addr;
4631     llvm::Value *Size;
4632     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4633     LValue Base;
4634     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4635       Base = CGF.MakeAddrLValue(
4636           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4637     } else {
4638       LValue &PosLVal = *Pos.get<LValue *>();
4639       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4640       Base = CGF.MakeAddrLValue(
4641           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4642                   DependenciesArray.getAlignment()),
4643           KmpDependInfoTy);
4644     }
4645     // deps[i].base_addr = &<Dependencies[i].second>;
4646     LValue BaseAddrLVal = CGF.EmitLValueForField(
4647         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4648     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4649                           BaseAddrLVal);
4650     // deps[i].len = sizeof(<Dependencies[i].second>);
4651     LValue LenLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4653     CGF.EmitStoreOfScalar(Size, LenLVal);
4654     // deps[i].flags = <Dependencies[i].first>;
4655     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4656     LValue FlagsLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4658     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4659                           FlagsLVal);
4660     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4661       ++(*P);
4662     } else {
4663       LValue &PosLVal = *Pos.get<LValue *>();
4664       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4665       Idx = CGF.Builder.CreateNUWAdd(Idx,
4666                                      llvm::ConstantInt::get(Idx->getType(), 1));
4667       CGF.EmitStoreOfScalar(Idx, PosLVal);
4668     }
4669   }
4670 }
4671 
4672 static SmallVector<llvm::Value *, 4>
4673 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4674                         const OMPTaskDataTy::DependData &Data) {
4675   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4676          "Expected depobj dependecy kind.");
4677   SmallVector<llvm::Value *, 4> Sizes;
4678   SmallVector<LValue, 4> SizeLVals;
4679   ASTContext &C = CGF.getContext();
4680   QualType FlagsTy;
4681   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4682   RecordDecl *KmpDependInfoRD =
4683       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4684   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4685   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4686   {
4687     OMPIteratorGeneratorScope IteratorScope(
4688         CGF, cast_or_null<OMPIteratorExpr>(
4689                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4690                                    : nullptr));
4691     for (const Expr *E : Data.DepExprs) {
4692       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4693       LValue Base = CGF.EmitLoadOfPointerLValue(
4694           DepobjLVal.getAddress(CGF),
4695           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4696       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4697           Base.getAddress(CGF), KmpDependInfoPtrT);
4698       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4699                                 Base.getTBAAInfo());
4700       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4701           Addr.getPointer(),
4702           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4703       LValue NumDepsBase = CGF.MakeAddrLValue(
4704           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4705           Base.getBaseInfo(), Base.getTBAAInfo());
4706       // NumDeps = deps[i].base_addr;
4707       LValue BaseAddrLVal = CGF.EmitLValueForField(
4708           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4709       llvm::Value *NumDeps =
4710           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4711       LValue NumLVal = CGF.MakeAddrLValue(
4712           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4713           C.getUIntPtrType());
4714       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4715                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4716       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4717       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4718       CGF.EmitStoreOfScalar(Add, NumLVal);
4719       SizeLVals.push_back(NumLVal);
4720     }
4721   }
4722   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4723     llvm::Value *Size =
4724         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4725     Sizes.push_back(Size);
4726   }
4727   return Sizes;
4728 }
4729 
4730 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731                                LValue PosLVal,
4732                                const OMPTaskDataTy::DependData &Data,
4733                                Address DependenciesArray) {
4734   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4735          "Expected depobj dependecy kind.");
4736   ASTContext &C = CGF.getContext();
4737   QualType FlagsTy;
4738   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4739   RecordDecl *KmpDependInfoRD =
4740       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4741   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4742   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4743   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4744   {
4745     OMPIteratorGeneratorScope IteratorScope(
4746         CGF, cast_or_null<OMPIteratorExpr>(
4747                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4748                                    : nullptr));
4749     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4750       const Expr *E = Data.DepExprs[I];
4751       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4752       LValue Base = CGF.EmitLoadOfPointerLValue(
4753           DepobjLVal.getAddress(CGF),
4754           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4755       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4756           Base.getAddress(CGF), KmpDependInfoPtrT);
4757       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4758                                 Base.getTBAAInfo());
4759 
4760       // Get number of elements in a single depobj.
4761       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4762           Addr.getPointer(),
4763           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4764       LValue NumDepsBase = CGF.MakeAddrLValue(
4765           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4766           Base.getBaseInfo(), Base.getTBAAInfo());
4767       // NumDeps = deps[i].base_addr;
4768       LValue BaseAddrLVal = CGF.EmitLValueForField(
4769           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4770       llvm::Value *NumDeps =
4771           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4772 
4773       // memcopy dependency data.
4774       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4775           ElSize,
4776           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4777       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4778       Address DepAddr =
4779           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4780                   DependenciesArray.getAlignment());
4781       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4782 
4783       // Increase pos.
4784       // pos += size;
4785       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4786       CGF.EmitStoreOfScalar(Add, PosLVal);
4787     }
4788   }
4789 }
4790 
4791 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4792     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4793     SourceLocation Loc) {
4794   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4795         return D.DepExprs.empty();
4796       }))
4797     return std::make_pair(nullptr, Address::invalid());
4798   // Process list of dependencies.
4799   ASTContext &C = CGM.getContext();
4800   Address DependenciesArray = Address::invalid();
4801   llvm::Value *NumOfElements = nullptr;
4802   unsigned NumDependencies = std::accumulate(
4803       Dependencies.begin(), Dependencies.end(), 0,
4804       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4805         return D.DepKind == OMPC_DEPEND_depobj
4806                    ? V
4807                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4808       });
4809   QualType FlagsTy;
4810   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4811   bool HasDepobjDeps = false;
4812   bool HasRegularWithIterators = false;
4813   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4814   llvm::Value *NumOfRegularWithIterators =
4815       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4816   // Calculate number of depobj dependecies and regular deps with the iterators.
4817   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4818     if (D.DepKind == OMPC_DEPEND_depobj) {
4819       SmallVector<llvm::Value *, 4> Sizes =
4820           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4821       for (llvm::Value *Size : Sizes) {
4822         NumOfDepobjElements =
4823             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4824       }
4825       HasDepobjDeps = true;
4826       continue;
4827     }
4828     // Include number of iterations, if any.
4829     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4830       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4831         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4832         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4833         NumOfRegularWithIterators =
4834             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4835       }
4836       HasRegularWithIterators = true;
4837       continue;
4838     }
4839   }
4840 
4841   QualType KmpDependInfoArrayTy;
4842   if (HasDepobjDeps || HasRegularWithIterators) {
4843     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4844                                            /*isSigned=*/false);
4845     if (HasDepobjDeps) {
4846       NumOfElements =
4847           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4848     }
4849     if (HasRegularWithIterators) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4852     }
4853     OpaqueValueExpr OVE(Loc,
4854                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4855                         VK_RValue);
4856     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4857                                                   RValue::get(NumOfElements));
4858     KmpDependInfoArrayTy =
4859         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4860                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4861     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4862     // Properly emit variable-sized array.
4863     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4864                                          ImplicitParamDecl::Other);
4865     CGF.EmitVarDecl(*PD);
4866     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4867     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4868                                               /*isSigned=*/false);
4869   } else {
4870     KmpDependInfoArrayTy = C.getConstantArrayType(
4871         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4872         ArrayType::Normal, /*IndexTypeQuals=*/0);
4873     DependenciesArray =
4874         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4875     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4876     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4877                                            /*isSigned=*/false);
4878   }
4879   unsigned Pos = 0;
4880   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4881     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4882         Dependencies[I].IteratorExpr)
4883       continue;
4884     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4885                    DependenciesArray);
4886   }
4887   // Copy regular dependecies with iterators.
4888   LValue PosLVal = CGF.MakeAddrLValue(
4889       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4890   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4891   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4892     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4893         !Dependencies[I].IteratorExpr)
4894       continue;
4895     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4896                    DependenciesArray);
4897   }
4898   // Copy final depobj arrays without iterators.
4899   if (HasDepobjDeps) {
4900     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4901       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4902         continue;
4903       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4904                          DependenciesArray);
4905     }
4906   }
4907   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4908       DependenciesArray, CGF.VoidPtrTy);
4909   return std::make_pair(NumOfElements, DependenciesArray);
4910 }
4911 
4912 Address CGOpenMPRuntime::emitDepobjDependClause(
4913     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4914     SourceLocation Loc) {
4915   if (Dependencies.DepExprs.empty())
4916     return Address::invalid();
4917   // Process list of dependencies.
4918   ASTContext &C = CGM.getContext();
4919   Address DependenciesArray = Address::invalid();
4920   unsigned NumDependencies = Dependencies.DepExprs.size();
4921   QualType FlagsTy;
4922   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4923   RecordDecl *KmpDependInfoRD =
4924       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4925 
4926   llvm::Value *Size;
4927   // Define type kmp_depend_info[<Dependencies.size()>];
4928   // For depobj reserve one extra element to store the number of elements.
4929   // It is required to handle depobj(x) update(in) construct.
4930   // kmp_depend_info[<Dependencies.size()>] deps;
4931   llvm::Value *NumDepsVal;
4932   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4933   if (const auto *IE =
4934           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4935     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4936     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4937       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4938       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4939       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4940     }
4941     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4942                                     NumDepsVal);
4943     CharUnits SizeInBytes =
4944         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4945     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4946     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4947     NumDepsVal =
4948         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4949   } else {
4950     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4951         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4952         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4953     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4954     Size = CGM.getSize(Sz.alignTo(Align));
4955     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4956   }
4957   // Need to allocate on the dynamic memory.
4958   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4959   // Use default allocator.
4960   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4961   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4962 
4963   llvm::Value *Addr =
4964       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4965                               CGM.getModule(), OMPRTL___kmpc_alloc),
4966                           Args, ".dep.arr.addr");
4967   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4968       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4969   DependenciesArray = Address(Addr, Align);
4970   // Write number of elements in the first element of array for depobj.
4971   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4972   // deps[i].base_addr = NumDependencies;
4973   LValue BaseAddrLVal = CGF.EmitLValueForField(
4974       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4975   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4976   llvm::PointerUnion<unsigned *, LValue *> Pos;
4977   unsigned Idx = 1;
4978   LValue PosLVal;
4979   if (Dependencies.IteratorExpr) {
4980     PosLVal = CGF.MakeAddrLValue(
4981         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4982         C.getSizeType());
4983     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4984                           /*IsInit=*/true);
4985     Pos = &PosLVal;
4986   } else {
4987     Pos = &Idx;
4988   }
4989   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4990   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4992   return DependenciesArray;
4993 }
4994 
4995 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4996                                         SourceLocation Loc) {
4997   ASTContext &C = CGM.getContext();
4998   QualType FlagsTy;
4999   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5000   LValue Base = CGF.EmitLoadOfPointerLValue(
5001       DepobjLVal.getAddress(CGF),
5002       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5003   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5004   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5005       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5006   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5007       Addr.getPointer(),
5008       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5009   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5010                                                                CGF.VoidPtrTy);
5011   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012   // Use default allocator.
5013   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5014   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5015 
5016   // _kmpc_free(gtid, addr, nullptr);
5017   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5018                                 CGM.getModule(), OMPRTL___kmpc_free),
5019                             Args);
5020 }
5021 
5022 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5023                                        OpenMPDependClauseKind NewDepKind,
5024                                        SourceLocation Loc) {
5025   ASTContext &C = CGM.getContext();
5026   QualType FlagsTy;
5027   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5028   RecordDecl *KmpDependInfoRD =
5029       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5030   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5031   llvm::Value *NumDeps;
5032   LValue Base;
5033   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5034 
5035   Address Begin = Base.getAddress(CGF);
5036   // Cast from pointer to array type to pointer to single element.
5037   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5038   // The basic structure here is a while-do loop.
5039   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5040   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5041   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5042   CGF.EmitBlock(BodyBB);
5043   llvm::PHINode *ElementPHI =
5044       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5045   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5046   Begin = Address(ElementPHI, Begin.getAlignment());
5047   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5048                             Base.getTBAAInfo());
5049   // deps[i].flags = NewDepKind;
5050   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5051   LValue FlagsLVal = CGF.EmitLValueForField(
5052       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5053   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5054                         FlagsLVal);
5055 
5056   // Shift the address forward by one element.
5057   Address ElementNext =
5058       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5059   ElementPHI->addIncoming(ElementNext.getPointer(),
5060                           CGF.Builder.GetInsertBlock());
5061   llvm::Value *IsEmpty =
5062       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5063   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5064   // Done.
5065   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5066 }
5067 
5068 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5069                                    const OMPExecutableDirective &D,
5070                                    llvm::Function *TaskFunction,
5071                                    QualType SharedsTy, Address Shareds,
5072                                    const Expr *IfCond,
5073                                    const OMPTaskDataTy &Data) {
5074   if (!CGF.HaveInsertPoint())
5075     return;
5076 
5077   TaskResultTy Result =
5078       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5079   llvm::Value *NewTask = Result.NewTask;
5080   llvm::Function *TaskEntry = Result.TaskEntry;
5081   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5082   LValue TDBase = Result.TDBase;
5083   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5084   // Process list of dependences.
5085   Address DependenciesArray = Address::invalid();
5086   llvm::Value *NumOfElements;
5087   std::tie(NumOfElements, DependenciesArray) =
5088       emitDependClause(CGF, Data.Dependences, Loc);
5089 
5090   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5091   // libcall.
5092   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5093   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5094   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5095   // list is not empty
5096   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5097   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5098   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5099   llvm::Value *DepTaskArgs[7];
5100   if (!Data.Dependences.empty()) {
5101     DepTaskArgs[0] = UpLoc;
5102     DepTaskArgs[1] = ThreadID;
5103     DepTaskArgs[2] = NewTask;
5104     DepTaskArgs[3] = NumOfElements;
5105     DepTaskArgs[4] = DependenciesArray.getPointer();
5106     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5107     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5108   }
5109   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5110                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5111     if (!Data.Tied) {
5112       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5113       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5114       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5115     }
5116     if (!Data.Dependences.empty()) {
5117       CGF.EmitRuntimeCall(
5118           OMPBuilder.getOrCreateRuntimeFunction(
5119               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5120           DepTaskArgs);
5121     } else {
5122       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5123                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5124                           TaskArgs);
5125     }
5126     // Check if parent region is untied and build return for untied task;
5127     if (auto *Region =
5128             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5129       Region->emitUntiedSwitch(CGF);
5130   };
5131 
5132   llvm::Value *DepWaitTaskArgs[6];
5133   if (!Data.Dependences.empty()) {
5134     DepWaitTaskArgs[0] = UpLoc;
5135     DepWaitTaskArgs[1] = ThreadID;
5136     DepWaitTaskArgs[2] = NumOfElements;
5137     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5138     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5139     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5140   }
5141   auto &M = CGM.getModule();
5142   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5143                         TaskEntry, &Data, &DepWaitTaskArgs,
5144                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5145     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5146     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5147     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5148     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5149     // is specified.
5150     if (!Data.Dependences.empty())
5151       CGF.EmitRuntimeCall(
5152           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5153           DepWaitTaskArgs);
5154     // Call proxy_task_entry(gtid, new_task);
5155     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5156                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5157       Action.Enter(CGF);
5158       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5159       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5160                                                           OutlinedFnArgs);
5161     };
5162 
5163     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5164     // kmp_task_t *new_task);
5165     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5166     // kmp_task_t *new_task);
5167     RegionCodeGenTy RCG(CodeGen);
5168     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5169                               M, OMPRTL___kmpc_omp_task_begin_if0),
5170                           TaskArgs,
5171                           OMPBuilder.getOrCreateRuntimeFunction(
5172                               M, OMPRTL___kmpc_omp_task_complete_if0),
5173                           TaskArgs);
5174     RCG.setAction(Action);
5175     RCG(CGF);
5176   };
5177 
5178   if (IfCond) {
5179     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5180   } else {
5181     RegionCodeGenTy ThenRCG(ThenCodeGen);
5182     ThenRCG(CGF);
5183   }
5184 }
5185 
5186 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5187                                        const OMPLoopDirective &D,
5188                                        llvm::Function *TaskFunction,
5189                                        QualType SharedsTy, Address Shareds,
5190                                        const Expr *IfCond,
5191                                        const OMPTaskDataTy &Data) {
5192   if (!CGF.HaveInsertPoint())
5193     return;
5194   TaskResultTy Result =
5195       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5196   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5197   // libcall.
5198   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5199   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5200   // sched, kmp_uint64 grainsize, void *task_dup);
5201   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5202   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5203   llvm::Value *IfVal;
5204   if (IfCond) {
5205     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5206                                       /*isSigned=*/true);
5207   } else {
5208     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5209   }
5210 
5211   LValue LBLVal = CGF.EmitLValueForField(
5212       Result.TDBase,
5213       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5214   const auto *LBVar =
5215       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5216   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5217                        LBLVal.getQuals(),
5218                        /*IsInitializer=*/true);
5219   LValue UBLVal = CGF.EmitLValueForField(
5220       Result.TDBase,
5221       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5222   const auto *UBVar =
5223       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5224   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5225                        UBLVal.getQuals(),
5226                        /*IsInitializer=*/true);
5227   LValue StLVal = CGF.EmitLValueForField(
5228       Result.TDBase,
5229       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5230   const auto *StVar =
5231       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5232   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5233                        StLVal.getQuals(),
5234                        /*IsInitializer=*/true);
5235   // Store reductions address.
5236   LValue RedLVal = CGF.EmitLValueForField(
5237       Result.TDBase,
5238       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5239   if (Data.Reductions) {
5240     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5241   } else {
5242     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5243                                CGF.getContext().VoidPtrTy);
5244   }
5245   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5246   llvm::Value *TaskArgs[] = {
5247       UpLoc,
5248       ThreadID,
5249       Result.NewTask,
5250       IfVal,
5251       LBLVal.getPointer(CGF),
5252       UBLVal.getPointer(CGF),
5253       CGF.EmitLoadOfScalar(StLVal, Loc),
5254       llvm::ConstantInt::getSigned(
5255           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5256       llvm::ConstantInt::getSigned(
5257           CGF.IntTy, Data.Schedule.getPointer()
5258                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5259                          : NoSchedule),
5260       Data.Schedule.getPointer()
5261           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5262                                       /*isSigned=*/false)
5263           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5264       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5265                              Result.TaskDupFn, CGF.VoidPtrTy)
5266                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5267   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5268                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5269                       TaskArgs);
5270 }
5271 
5272 /// Emit reduction operation for each element of array (required for
5273 /// array sections) LHS op = RHS.
5274 /// \param Type Type of array.
5275 /// \param LHSVar Variable on the left side of the reduction operation
5276 /// (references element of array in original variable).
5277 /// \param RHSVar Variable on the right side of the reduction operation
5278 /// (references element of array in original variable).
5279 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5280 /// RHSVar.
5281 static void EmitOMPAggregateReduction(
5282     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5283     const VarDecl *RHSVar,
5284     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5285                                   const Expr *, const Expr *)> &RedOpGen,
5286     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5287     const Expr *UpExpr = nullptr) {
5288   // Perform element-by-element initialization.
5289   QualType ElementTy;
5290   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5291   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5292 
5293   // Drill down to the base element type on both arrays.
5294   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5295   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5296 
5297   llvm::Value *RHSBegin = RHSAddr.getPointer();
5298   llvm::Value *LHSBegin = LHSAddr.getPointer();
5299   // Cast from pointer to array type to pointer to single element.
5300   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5301   // The basic structure here is a while-do loop.
5302   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5303   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5304   llvm::Value *IsEmpty =
5305       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5306   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5307 
5308   // Enter the loop body, making that address the current address.
5309   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5310   CGF.EmitBlock(BodyBB);
5311 
5312   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5313 
5314   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5315       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5316   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5317   Address RHSElementCurrent =
5318       Address(RHSElementPHI,
5319               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5320 
5321   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5322       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5323   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5324   Address LHSElementCurrent =
5325       Address(LHSElementPHI,
5326               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5327 
5328   // Emit copy.
5329   CodeGenFunction::OMPPrivateScope Scope(CGF);
5330   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5331   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5332   Scope.Privatize();
5333   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5334   Scope.ForceCleanup();
5335 
5336   // Shift the address forward by one element.
5337   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5338       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5339   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5340       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5341   // Check whether we've reached the end.
5342   llvm::Value *Done =
5343       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5344   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5345   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5346   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5347 
5348   // Done.
5349   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5350 }
5351 
5352 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5353 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5354 /// UDR combiner function.
5355 static void emitReductionCombiner(CodeGenFunction &CGF,
5356                                   const Expr *ReductionOp) {
5357   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5358     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5359       if (const auto *DRE =
5360               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5361         if (const auto *DRD =
5362                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5363           std::pair<llvm::Function *, llvm::Function *> Reduction =
5364               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5365           RValue Func = RValue::get(Reduction.first);
5366           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5367           CGF.EmitIgnoredExpr(ReductionOp);
5368           return;
5369         }
5370   CGF.EmitIgnoredExpr(ReductionOp);
5371 }
5372 
5373 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5374     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5375     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5376     ArrayRef<const Expr *> ReductionOps) {
5377   ASTContext &C = CGM.getContext();
5378 
5379   // void reduction_func(void *LHSArg, void *RHSArg);
5380   FunctionArgList Args;
5381   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5382                            ImplicitParamDecl::Other);
5383   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5384                            ImplicitParamDecl::Other);
5385   Args.push_back(&LHSArg);
5386   Args.push_back(&RHSArg);
5387   const auto &CGFI =
5388       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5389   std::string Name = getName({"omp", "reduction", "reduction_func"});
5390   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5391                                     llvm::GlobalValue::InternalLinkage, Name,
5392                                     &CGM.getModule());
5393   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5394   Fn->setDoesNotRecurse();
5395   CodeGenFunction CGF(CGM);
5396   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5397 
5398   // Dst = (void*[n])(LHSArg);
5399   // Src = (void*[n])(RHSArg);
5400   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5402       ArgsType), CGF.getPointerAlign());
5403   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5405       ArgsType), CGF.getPointerAlign());
5406 
5407   //  ...
5408   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5409   //  ...
5410   CodeGenFunction::OMPPrivateScope Scope(CGF);
5411   auto IPriv = Privates.begin();
5412   unsigned Idx = 0;
5413   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5414     const auto *RHSVar =
5415         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5416     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5417       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5418     });
5419     const auto *LHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5421     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5423     });
5424     QualType PrivTy = (*IPriv)->getType();
5425     if (PrivTy->isVariablyModifiedType()) {
5426       // Get array size and emit VLA type.
5427       ++Idx;
5428       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5429       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5430       const VariableArrayType *VLA =
5431           CGF.getContext().getAsVariableArrayType(PrivTy);
5432       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5433       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5434           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5435       CGF.EmitVariablyModifiedType(PrivTy);
5436     }
5437   }
5438   Scope.Privatize();
5439   IPriv = Privates.begin();
5440   auto ILHS = LHSExprs.begin();
5441   auto IRHS = RHSExprs.begin();
5442   for (const Expr *E : ReductionOps) {
5443     if ((*IPriv)->getType()->isArrayType()) {
5444       // Emit reduction for array section.
5445       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5446       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5447       EmitOMPAggregateReduction(
5448           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5449           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5450             emitReductionCombiner(CGF, E);
5451           });
5452     } else {
5453       // Emit reduction for array subscript or single variable.
5454       emitReductionCombiner(CGF, E);
5455     }
5456     ++IPriv;
5457     ++ILHS;
5458     ++IRHS;
5459   }
5460   Scope.ForceCleanup();
5461   CGF.FinishFunction();
5462   return Fn;
5463 }
5464 
5465 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5466                                                   const Expr *ReductionOp,
5467                                                   const Expr *PrivateRef,
5468                                                   const DeclRefExpr *LHS,
5469                                                   const DeclRefExpr *RHS) {
5470   if (PrivateRef->getType()->isArrayType()) {
5471     // Emit reduction for array section.
5472     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5473     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5474     EmitOMPAggregateReduction(
5475         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5476         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5477           emitReductionCombiner(CGF, ReductionOp);
5478         });
5479   } else {
5480     // Emit reduction for array subscript or single variable.
5481     emitReductionCombiner(CGF, ReductionOp);
5482   }
5483 }
5484 
5485 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5486                                     ArrayRef<const Expr *> Privates,
5487                                     ArrayRef<const Expr *> LHSExprs,
5488                                     ArrayRef<const Expr *> RHSExprs,
5489                                     ArrayRef<const Expr *> ReductionOps,
5490                                     ReductionOptionsTy Options) {
5491   if (!CGF.HaveInsertPoint())
5492     return;
5493 
5494   bool WithNowait = Options.WithNowait;
5495   bool SimpleReduction = Options.SimpleReduction;
5496 
5497   // Next code should be emitted for reduction:
5498   //
5499   // static kmp_critical_name lock = { 0 };
5500   //
5501   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5502   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5503   //  ...
5504   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5505   //  *(Type<n>-1*)rhs[<n>-1]);
5506   // }
5507   //
5508   // ...
5509   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5510   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5511   // RedList, reduce_func, &<lock>)) {
5512   // case 1:
5513   //  ...
5514   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5515   //  ...
5516   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5517   // break;
5518   // case 2:
5519   //  ...
5520   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5521   //  ...
5522   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5523   // break;
5524   // default:;
5525   // }
5526   //
5527   // if SimpleReduction is true, only the next code is generated:
5528   //  ...
5529   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5530   //  ...
5531 
5532   ASTContext &C = CGM.getContext();
5533 
5534   if (SimpleReduction) {
5535     CodeGenFunction::RunCleanupsScope Scope(CGF);
5536     auto IPriv = Privates.begin();
5537     auto ILHS = LHSExprs.begin();
5538     auto IRHS = RHSExprs.begin();
5539     for (const Expr *E : ReductionOps) {
5540       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5541                                   cast<DeclRefExpr>(*IRHS));
5542       ++IPriv;
5543       ++ILHS;
5544       ++IRHS;
5545     }
5546     return;
5547   }
5548 
5549   // 1. Build a list of reduction variables.
5550   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5551   auto Size = RHSExprs.size();
5552   for (const Expr *E : Privates) {
5553     if (E->getType()->isVariablyModifiedType())
5554       // Reserve place for array size.
5555       ++Size;
5556   }
5557   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5558   QualType ReductionArrayTy =
5559       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5560                              /*IndexTypeQuals=*/0);
5561   Address ReductionList =
5562       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5563   auto IPriv = Privates.begin();
5564   unsigned Idx = 0;
5565   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5566     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5567     CGF.Builder.CreateStore(
5568         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5570         Elem);
5571     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5572       // Store array size.
5573       ++Idx;
5574       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5575       llvm::Value *Size = CGF.Builder.CreateIntCast(
5576           CGF.getVLASize(
5577                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5578               .NumElts,
5579           CGF.SizeTy, /*isSigned=*/false);
5580       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5581                               Elem);
5582     }
5583   }
5584 
5585   // 2. Emit reduce_func().
5586   llvm::Function *ReductionFn = emitReductionFunction(
5587       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5588       LHSExprs, RHSExprs, ReductionOps);
5589 
5590   // 3. Create static kmp_critical_name lock = { 0 };
5591   std::string Name = getName({"reduction"});
5592   llvm::Value *Lock = getCriticalRegionLock(Name);
5593 
5594   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5595   // RedList, reduce_func, &<lock>);
5596   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5597   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5598   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5599   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600       ReductionList.getPointer(), CGF.VoidPtrTy);
5601   llvm::Value *Args[] = {
5602       IdentTLoc,                             // ident_t *<loc>
5603       ThreadId,                              // i32 <gtid>
5604       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5605       ReductionArrayTySize,                  // size_type sizeof(RedList)
5606       RL,                                    // void *RedList
5607       ReductionFn, // void (*) (void *, void *) <reduce_func>
5608       Lock         // kmp_critical_name *&<lock>
5609   };
5610   llvm::Value *Res = CGF.EmitRuntimeCall(
5611       OMPBuilder.getOrCreateRuntimeFunction(
5612           CGM.getModule(),
5613           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5614       Args);
5615 
5616   // 5. Build switch(res)
5617   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5618   llvm::SwitchInst *SwInst =
5619       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5620 
5621   // 6. Build case 1:
5622   //  ...
5623   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5624   //  ...
5625   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5626   // break;
5627   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5628   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5629   CGF.EmitBlock(Case1BB);
5630 
5631   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5632   llvm::Value *EndArgs[] = {
5633       IdentTLoc, // ident_t *<loc>
5634       ThreadId,  // i32 <gtid>
5635       Lock       // kmp_critical_name *&<lock>
5636   };
5637   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5638                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5639     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5640     auto IPriv = Privates.begin();
5641     auto ILHS = LHSExprs.begin();
5642     auto IRHS = RHSExprs.begin();
5643     for (const Expr *E : ReductionOps) {
5644       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5645                                      cast<DeclRefExpr>(*IRHS));
5646       ++IPriv;
5647       ++ILHS;
5648       ++IRHS;
5649     }
5650   };
5651   RegionCodeGenTy RCG(CodeGen);
5652   CommonActionTy Action(
5653       nullptr, llvm::None,
5654       OMPBuilder.getOrCreateRuntimeFunction(
5655           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5656                                       : OMPRTL___kmpc_end_reduce),
5657       EndArgs);
5658   RCG.setAction(Action);
5659   RCG(CGF);
5660 
5661   CGF.EmitBranch(DefaultBB);
5662 
5663   // 7. Build case 2:
5664   //  ...
5665   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5666   //  ...
5667   // break;
5668   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5669   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5670   CGF.EmitBlock(Case2BB);
5671 
5672   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5673                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5674     auto ILHS = LHSExprs.begin();
5675     auto IRHS = RHSExprs.begin();
5676     auto IPriv = Privates.begin();
5677     for (const Expr *E : ReductionOps) {
5678       const Expr *XExpr = nullptr;
5679       const Expr *EExpr = nullptr;
5680       const Expr *UpExpr = nullptr;
5681       BinaryOperatorKind BO = BO_Comma;
5682       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5683         if (BO->getOpcode() == BO_Assign) {
5684           XExpr = BO->getLHS();
5685           UpExpr = BO->getRHS();
5686         }
5687       }
5688       // Try to emit update expression as a simple atomic.
5689       const Expr *RHSExpr = UpExpr;
5690       if (RHSExpr) {
5691         // Analyze RHS part of the whole expression.
5692         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5693                 RHSExpr->IgnoreParenImpCasts())) {
5694           // If this is a conditional operator, analyze its condition for
5695           // min/max reduction operator.
5696           RHSExpr = ACO->getCond();
5697         }
5698         if (const auto *BORHS =
5699                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5700           EExpr = BORHS->getRHS();
5701           BO = BORHS->getOpcode();
5702         }
5703       }
5704       if (XExpr) {
5705         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5706         auto &&AtomicRedGen = [BO, VD,
5707                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5708                                     const Expr *EExpr, const Expr *UpExpr) {
5709           LValue X = CGF.EmitLValue(XExpr);
5710           RValue E;
5711           if (EExpr)
5712             E = CGF.EmitAnyExpr(EExpr);
5713           CGF.EmitOMPAtomicSimpleUpdateExpr(
5714               X, E, BO, /*IsXLHSInRHSPart=*/true,
5715               llvm::AtomicOrdering::Monotonic, Loc,
5716               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5717                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5718                 PrivateScope.addPrivate(
5719                     VD, [&CGF, VD, XRValue, Loc]() {
5720                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5721                       CGF.emitOMPSimpleStore(
5722                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5723                           VD->getType().getNonReferenceType(), Loc);
5724                       return LHSTemp;
5725                     });
5726                 (void)PrivateScope.Privatize();
5727                 return CGF.EmitAnyExpr(UpExpr);
5728               });
5729         };
5730         if ((*IPriv)->getType()->isArrayType()) {
5731           // Emit atomic reduction for array section.
5732           const auto *RHSVar =
5733               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5734           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5735                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5736         } else {
5737           // Emit atomic reduction for array subscript or single variable.
5738           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5739         }
5740       } else {
5741         // Emit as a critical region.
5742         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5743                                            const Expr *, const Expr *) {
5744           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5745           std::string Name = RT.getName({"atomic_reduction"});
5746           RT.emitCriticalRegion(
5747               CGF, Name,
5748               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5749                 Action.Enter(CGF);
5750                 emitReductionCombiner(CGF, E);
5751               },
5752               Loc);
5753         };
5754         if ((*IPriv)->getType()->isArrayType()) {
5755           const auto *LHSVar =
5756               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5757           const auto *RHSVar =
5758               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5759           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5760                                     CritRedGen);
5761         } else {
5762           CritRedGen(CGF, nullptr, nullptr, nullptr);
5763         }
5764       }
5765       ++ILHS;
5766       ++IRHS;
5767       ++IPriv;
5768     }
5769   };
5770   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5771   if (!WithNowait) {
5772     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5773     llvm::Value *EndArgs[] = {
5774         IdentTLoc, // ident_t *<loc>
5775         ThreadId,  // i32 <gtid>
5776         Lock       // kmp_critical_name *&<lock>
5777     };
5778     CommonActionTy Action(nullptr, llvm::None,
5779                           OMPBuilder.getOrCreateRuntimeFunction(
5780                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5781                           EndArgs);
5782     AtomicRCG.setAction(Action);
5783     AtomicRCG(CGF);
5784   } else {
5785     AtomicRCG(CGF);
5786   }
5787 
5788   CGF.EmitBranch(DefaultBB);
5789   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5790 }
5791 
5792 /// Generates unique name for artificial threadprivate variables.
5793 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5794 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5795                                       const Expr *Ref) {
5796   SmallString<256> Buffer;
5797   llvm::raw_svector_ostream Out(Buffer);
5798   const clang::DeclRefExpr *DE;
5799   const VarDecl *D = ::getBaseDecl(Ref, DE);
5800   if (!D)
5801     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5802   D = D->getCanonicalDecl();
5803   std::string Name = CGM.getOpenMPRuntime().getName(
5804       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5805   Out << Prefix << Name << "_"
5806       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5807   return std::string(Out.str());
5808 }
5809 
5810 /// Emits reduction initializer function:
5811 /// \code
5812 /// void @.red_init(void* %arg, void* %orig) {
5813 /// %0 = bitcast void* %arg to <type>*
5814 /// store <type> <init>, <type>* %0
5815 /// ret void
5816 /// }
5817 /// \endcode
5818 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5819                                            SourceLocation Loc,
5820                                            ReductionCodeGen &RCG, unsigned N) {
5821   ASTContext &C = CGM.getContext();
5822   QualType VoidPtrTy = C.VoidPtrTy;
5823   VoidPtrTy.addRestrict();
5824   FunctionArgList Args;
5825   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5826                           ImplicitParamDecl::Other);
5827   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5828                               ImplicitParamDecl::Other);
5829   Args.emplace_back(&Param);
5830   Args.emplace_back(&ParamOrig);
5831   const auto &FnInfo =
5832       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5833   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5834   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5835   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5836                                     Name, &CGM.getModule());
5837   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5838   Fn->setDoesNotRecurse();
5839   CodeGenFunction CGF(CGM);
5840   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5841   Address PrivateAddr = CGF.EmitLoadOfPointer(
5842       CGF.GetAddrOfLocalVar(&Param),
5843       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5844   llvm::Value *Size = nullptr;
5845   // If the size of the reduction item is non-constant, load it from global
5846   // threadprivate variable.
5847   if (RCG.getSizes(N).second) {
5848     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5849         CGF, CGM.getContext().getSizeType(),
5850         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5851     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5852                                 CGM.getContext().getSizeType(), Loc);
5853   }
5854   RCG.emitAggregateType(CGF, N, Size);
5855   LValue OrigLVal;
5856   // If initializer uses initializer from declare reduction construct, emit a
5857   // pointer to the address of the original reduction item (reuired by reduction
5858   // initializer)
5859   if (RCG.usesReductionInitializer(N)) {
5860     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5861     SharedAddr = CGF.EmitLoadOfPointer(
5862         SharedAddr,
5863         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5864     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5865   } else {
5866     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5867         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5868         CGM.getContext().VoidPtrTy);
5869   }
5870   // Emit the initializer:
5871   // %0 = bitcast void* %arg to <type>*
5872   // store <type> <init>, <type>* %0
5873   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5874                          [](CodeGenFunction &) { return false; });
5875   CGF.FinishFunction();
5876   return Fn;
5877 }
5878 
5879 /// Emits reduction combiner function:
5880 /// \code
5881 /// void @.red_comb(void* %arg0, void* %arg1) {
5882 /// %lhs = bitcast void* %arg0 to <type>*
5883 /// %rhs = bitcast void* %arg1 to <type>*
5884 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5885 /// store <type> %2, <type>* %lhs
5886 /// ret void
5887 /// }
5888 /// \endcode
5889 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5890                                            SourceLocation Loc,
5891                                            ReductionCodeGen &RCG, unsigned N,
5892                                            const Expr *ReductionOp,
5893                                            const Expr *LHS, const Expr *RHS,
5894                                            const Expr *PrivateRef) {
5895   ASTContext &C = CGM.getContext();
5896   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5897   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5898   FunctionArgList Args;
5899   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5900                                C.VoidPtrTy, ImplicitParamDecl::Other);
5901   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5902                             ImplicitParamDecl::Other);
5903   Args.emplace_back(&ParamInOut);
5904   Args.emplace_back(&ParamIn);
5905   const auto &FnInfo =
5906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5908   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5910                                     Name, &CGM.getModule());
5911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5912   Fn->setDoesNotRecurse();
5913   CodeGenFunction CGF(CGM);
5914   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5915   llvm::Value *Size = nullptr;
5916   // If the size of the reduction item is non-constant, load it from global
5917   // threadprivate variable.
5918   if (RCG.getSizes(N).second) {
5919     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5920         CGF, CGM.getContext().getSizeType(),
5921         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5922     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5923                                 CGM.getContext().getSizeType(), Loc);
5924   }
5925   RCG.emitAggregateType(CGF, N, Size);
5926   // Remap lhs and rhs variables to the addresses of the function arguments.
5927   // %lhs = bitcast void* %arg0 to <type>*
5928   // %rhs = bitcast void* %arg1 to <type>*
5929   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5930   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5931     // Pull out the pointer to the variable.
5932     Address PtrAddr = CGF.EmitLoadOfPointer(
5933         CGF.GetAddrOfLocalVar(&ParamInOut),
5934         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5935     return CGF.Builder.CreateElementBitCast(
5936         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5937   });
5938   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5939     // Pull out the pointer to the variable.
5940     Address PtrAddr = CGF.EmitLoadOfPointer(
5941         CGF.GetAddrOfLocalVar(&ParamIn),
5942         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5943     return CGF.Builder.CreateElementBitCast(
5944         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5945   });
5946   PrivateScope.Privatize();
5947   // Emit the combiner body:
5948   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5949   // store <type> %2, <type>* %lhs
5950   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5951       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5952       cast<DeclRefExpr>(RHS));
5953   CGF.FinishFunction();
5954   return Fn;
5955 }
5956 
5957 /// Emits reduction finalizer function:
5958 /// \code
5959 /// void @.red_fini(void* %arg) {
5960 /// %0 = bitcast void* %arg to <type>*
5961 /// <destroy>(<type>* %0)
5962 /// ret void
5963 /// }
5964 /// \endcode
5965 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5966                                            SourceLocation Loc,
5967                                            ReductionCodeGen &RCG, unsigned N) {
5968   if (!RCG.needCleanups(N))
5969     return nullptr;
5970   ASTContext &C = CGM.getContext();
5971   FunctionArgList Args;
5972   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5973                           ImplicitParamDecl::Other);
5974   Args.emplace_back(&Param);
5975   const auto &FnInfo =
5976       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5977   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5978   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5979   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5980                                     Name, &CGM.getModule());
5981   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5982   Fn->setDoesNotRecurse();
5983   CodeGenFunction CGF(CGM);
5984   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5985   Address PrivateAddr = CGF.EmitLoadOfPointer(
5986       CGF.GetAddrOfLocalVar(&Param),
5987       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988   llvm::Value *Size = nullptr;
5989   // If the size of the reduction item is non-constant, load it from global
5990   // threadprivate variable.
5991   if (RCG.getSizes(N).second) {
5992     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5993         CGF, CGM.getContext().getSizeType(),
5994         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5995     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5996                                 CGM.getContext().getSizeType(), Loc);
5997   }
5998   RCG.emitAggregateType(CGF, N, Size);
5999   // Emit the finalizer body:
6000   // <destroy>(<type>* %0)
6001   RCG.emitCleanups(CGF, N, PrivateAddr);
6002   CGF.FinishFunction(Loc);
6003   return Fn;
6004 }
6005 
6006 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6007     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6008     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6009   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6010     return nullptr;
6011 
6012   // Build typedef struct:
6013   // kmp_taskred_input {
6014   //   void *reduce_shar; // shared reduction item
6015   //   void *reduce_orig; // original reduction item used for initialization
6016   //   size_t reduce_size; // size of data item
6017   //   void *reduce_init; // data initialization routine
6018   //   void *reduce_fini; // data finalization routine
6019   //   void *reduce_comb; // data combiner routine
6020   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6021   // } kmp_taskred_input_t;
6022   ASTContext &C = CGM.getContext();
6023   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6024   RD->startDefinition();
6025   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6026   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6027   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6028   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6029   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6032       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6033   RD->completeDefinition();
6034   QualType RDType = C.getRecordType(RD);
6035   unsigned Size = Data.ReductionVars.size();
6036   llvm::APInt ArraySize(/*numBits=*/64, Size);
6037   QualType ArrayRDType = C.getConstantArrayType(
6038       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6039   // kmp_task_red_input_t .rd_input.[Size];
6040   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6041   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6042                        Data.ReductionCopies, Data.ReductionOps);
6043   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6044     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6045     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6046                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6047     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6048         TaskRedInput.getPointer(), Idxs,
6049         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6050         ".rd_input.gep.");
6051     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6052     // ElemLVal.reduce_shar = &Shareds[Cnt];
6053     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6054     RCG.emitSharedOrigLValue(CGF, Cnt);
6055     llvm::Value *CastedShared =
6056         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6057     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6058     // ElemLVal.reduce_orig = &Origs[Cnt];
6059     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6060     llvm::Value *CastedOrig =
6061         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6063     RCG.emitAggregateType(CGF, Cnt);
6064     llvm::Value *SizeValInChars;
6065     llvm::Value *SizeVal;
6066     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6067     // We use delayed creation/initialization for VLAs and array sections. It is
6068     // required because runtime does not provide the way to pass the sizes of
6069     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6070     // threadprivate global variables are used to store these values and use
6071     // them in the functions.
6072     bool DelayedCreation = !!SizeVal;
6073     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6074                                                /*isSigned=*/false);
6075     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6076     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6077     // ElemLVal.reduce_init = init;
6078     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6079     llvm::Value *InitAddr =
6080         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6081     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6082     // ElemLVal.reduce_fini = fini;
6083     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6084     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6085     llvm::Value *FiniAddr = Fini
6086                                 ? CGF.EmitCastToVoidPtr(Fini)
6087                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6088     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6089     // ElemLVal.reduce_comb = comb;
6090     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6091     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6092         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6093         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6094     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6095     // ElemLVal.flags = 0;
6096     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6097     if (DelayedCreation) {
6098       CGF.EmitStoreOfScalar(
6099           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6100           FlagsLVal);
6101     } else
6102       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6103                                  FlagsLVal.getType());
6104   }
6105   if (Data.IsReductionWithTaskMod) {
6106     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6107     // is_ws, int num, void *data);
6108     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6109     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6110                                                   CGM.IntTy, /*isSigned=*/true);
6111     llvm::Value *Args[] = {
6112         IdentTLoc, GTid,
6113         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6114                                /*isSigned=*/true),
6115         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6116         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6117             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6118     return CGF.EmitRuntimeCall(
6119         OMPBuilder.getOrCreateRuntimeFunction(
6120             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6121         Args);
6122   }
6123   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6124   llvm::Value *Args[] = {
6125       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6126                                 /*isSigned=*/true),
6127       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6128       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6129                                                       CGM.VoidPtrTy)};
6130   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6131                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6132                              Args);
6133 }
6134 
6135 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6136                                             SourceLocation Loc,
6137                                             bool IsWorksharingReduction) {
6138   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6139   // is_ws, int num, void *data);
6140   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6141   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6142                                                 CGM.IntTy, /*isSigned=*/true);
6143   llvm::Value *Args[] = {IdentTLoc, GTid,
6144                          llvm::ConstantInt::get(CGM.IntTy,
6145                                                 IsWorksharingReduction ? 1 : 0,
6146                                                 /*isSigned=*/true)};
6147   (void)CGF.EmitRuntimeCall(
6148       OMPBuilder.getOrCreateRuntimeFunction(
6149           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6150       Args);
6151 }
6152 
6153 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6154                                               SourceLocation Loc,
6155                                               ReductionCodeGen &RCG,
6156                                               unsigned N) {
6157   auto Sizes = RCG.getSizes(N);
6158   // Emit threadprivate global variable if the type is non-constant
6159   // (Sizes.second = nullptr).
6160   if (Sizes.second) {
6161     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6162                                                      /*isSigned=*/false);
6163     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6164         CGF, CGM.getContext().getSizeType(),
6165         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6166     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6167   }
6168 }
6169 
6170 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6171                                               SourceLocation Loc,
6172                                               llvm::Value *ReductionsPtr,
6173                                               LValue SharedLVal) {
6174   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6175   // *d);
6176   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6177                                                    CGM.IntTy,
6178                                                    /*isSigned=*/true),
6179                          ReductionsPtr,
6180                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6181                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6182   return Address(
6183       CGF.EmitRuntimeCall(
6184           OMPBuilder.getOrCreateRuntimeFunction(
6185               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6186           Args),
6187       SharedLVal.getAlignment());
6188 }
6189 
6190 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6191                                        SourceLocation Loc) {
6192   if (!CGF.HaveInsertPoint())
6193     return;
6194 
6195   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6196     OMPBuilder.createTaskwait(CGF.Builder);
6197   } else {
6198     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6199     // global_tid);
6200     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6201     // Ignore return result until untied tasks are supported.
6202     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6203                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6204                         Args);
6205   }
6206 
6207   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6208     Region->emitUntiedSwitch(CGF);
6209 }
6210 
6211 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6212                                            OpenMPDirectiveKind InnerKind,
6213                                            const RegionCodeGenTy &CodeGen,
6214                                            bool HasCancel) {
6215   if (!CGF.HaveInsertPoint())
6216     return;
6217   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6218   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6219 }
6220 
6221 namespace {
6222 enum RTCancelKind {
6223   CancelNoreq = 0,
6224   CancelParallel = 1,
6225   CancelLoop = 2,
6226   CancelSections = 3,
6227   CancelTaskgroup = 4
6228 };
6229 } // anonymous namespace
6230 
6231 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6232   RTCancelKind CancelKind = CancelNoreq;
6233   if (CancelRegion == OMPD_parallel)
6234     CancelKind = CancelParallel;
6235   else if (CancelRegion == OMPD_for)
6236     CancelKind = CancelLoop;
6237   else if (CancelRegion == OMPD_sections)
6238     CancelKind = CancelSections;
6239   else {
6240     assert(CancelRegion == OMPD_taskgroup);
6241     CancelKind = CancelTaskgroup;
6242   }
6243   return CancelKind;
6244 }
6245 
6246 void CGOpenMPRuntime::emitCancellationPointCall(
6247     CodeGenFunction &CGF, SourceLocation Loc,
6248     OpenMPDirectiveKind CancelRegion) {
6249   if (!CGF.HaveInsertPoint())
6250     return;
6251   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6252   // global_tid, kmp_int32 cncl_kind);
6253   if (auto *OMPRegionInfo =
6254           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6255     // For 'cancellation point taskgroup', the task region info may not have a
6256     // cancel. This may instead happen in another adjacent task.
6257     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6258       llvm::Value *Args[] = {
6259           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6260           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6261       // Ignore return result until untied tasks are supported.
6262       llvm::Value *Result = CGF.EmitRuntimeCall(
6263           OMPBuilder.getOrCreateRuntimeFunction(
6264               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6265           Args);
6266       // if (__kmpc_cancellationpoint()) {
6267       //   exit from construct;
6268       // }
6269       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6270       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6271       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6272       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6273       CGF.EmitBlock(ExitBB);
6274       // exit from construct;
6275       CodeGenFunction::JumpDest CancelDest =
6276           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6277       CGF.EmitBranchThroughCleanup(CancelDest);
6278       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6279     }
6280   }
6281 }
6282 
6283 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6284                                      const Expr *IfCond,
6285                                      OpenMPDirectiveKind CancelRegion) {
6286   if (!CGF.HaveInsertPoint())
6287     return;
6288   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6289   // kmp_int32 cncl_kind);
6290   auto &M = CGM.getModule();
6291   if (auto *OMPRegionInfo =
6292           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6293     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6294                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6295       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6296       llvm::Value *Args[] = {
6297           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6298           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6299       // Ignore return result until untied tasks are supported.
6300       llvm::Value *Result = CGF.EmitRuntimeCall(
6301           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6302       // if (__kmpc_cancel()) {
6303       //   exit from construct;
6304       // }
6305       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6306       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6307       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6308       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6309       CGF.EmitBlock(ExitBB);
6310       // exit from construct;
6311       CodeGenFunction::JumpDest CancelDest =
6312           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6313       CGF.EmitBranchThroughCleanup(CancelDest);
6314       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6315     };
6316     if (IfCond) {
6317       emitIfClause(CGF, IfCond, ThenGen,
6318                    [](CodeGenFunction &, PrePostActionTy &) {});
6319     } else {
6320       RegionCodeGenTy ThenRCG(ThenGen);
6321       ThenRCG(CGF);
6322     }
6323   }
6324 }
6325 
6326 namespace {
6327 /// Cleanup action for uses_allocators support.
6328 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6329   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6330 
6331 public:
6332   OMPUsesAllocatorsActionTy(
6333       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6334       : Allocators(Allocators) {}
6335   void Enter(CodeGenFunction &CGF) override {
6336     if (!CGF.HaveInsertPoint())
6337       return;
6338     for (const auto &AllocatorData : Allocators) {
6339       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6340           CGF, AllocatorData.first, AllocatorData.second);
6341     }
6342   }
6343   void Exit(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6348                                                         AllocatorData.first);
6349     }
6350   }
6351 };
6352 } // namespace
6353 
6354 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6355     const OMPExecutableDirective &D, StringRef ParentName,
6356     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6357     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6358   assert(!ParentName.empty() && "Invalid target region parent name!");
6359   HasEmittedTargetRegion = true;
6360   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6361   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6362     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6363       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6364       if (!D.AllocatorTraits)
6365         continue;
6366       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6367     }
6368   }
6369   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6370   CodeGen.setAction(UsesAllocatorAction);
6371   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6372                                    IsOffloadEntry, CodeGen);
6373 }
6374 
6375 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6376                                              const Expr *Allocator,
6377                                              const Expr *AllocatorTraits) {
6378   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6379   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6380   // Use default memspace handle.
6381   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6382   llvm::Value *NumTraits = llvm::ConstantInt::get(
6383       CGF.IntTy, cast<ConstantArrayType>(
6384                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6385                      ->getSize()
6386                      .getLimitedValue());
6387   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6388   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6389       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6390   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6391                                            AllocatorTraitsLVal.getBaseInfo(),
6392                                            AllocatorTraitsLVal.getTBAAInfo());
6393   llvm::Value *Traits =
6394       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6395 
6396   llvm::Value *AllocatorVal =
6397       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6398                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6399                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6400   // Store to allocator.
6401   CGF.EmitVarDecl(*cast<VarDecl>(
6402       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6403   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6404   AllocatorVal =
6405       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6406                                Allocator->getType(), Allocator->getExprLoc());
6407   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6408 }
6409 
6410 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6411                                              const Expr *Allocator) {
6412   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6413   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6414   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6415   llvm::Value *AllocatorVal =
6416       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6417   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6418                                           CGF.getContext().VoidPtrTy,
6419                                           Allocator->getExprLoc());
6420   (void)CGF.EmitRuntimeCall(
6421       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6422                                             OMPRTL___kmpc_destroy_allocator),
6423       {ThreadId, AllocatorVal});
6424 }
6425 
6426 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6427     const OMPExecutableDirective &D, StringRef ParentName,
6428     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6429     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6430   // Create a unique name for the entry function using the source location
6431   // information of the current target region. The name will be something like:
6432   //
6433   // __omp_offloading_DD_FFFF_PP_lBB
6434   //
6435   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6436   // mangled name of the function that encloses the target region and BB is the
6437   // line number of the target region.
6438 
6439   unsigned DeviceID;
6440   unsigned FileID;
6441   unsigned Line;
6442   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6443                            Line);
6444   SmallString<64> EntryFnName;
6445   {
6446     llvm::raw_svector_ostream OS(EntryFnName);
6447     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6448        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6449   }
6450 
6451   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6452 
6453   CodeGenFunction CGF(CGM, true);
6454   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6455   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6456 
6457   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6458 
6459   // If this target outline function is not an offload entry, we don't need to
6460   // register it.
6461   if (!IsOffloadEntry)
6462     return;
6463 
6464   // The target region ID is used by the runtime library to identify the current
6465   // target region, so it only has to be unique and not necessarily point to
6466   // anything. It could be the pointer to the outlined function that implements
6467   // the target region, but we aren't using that so that the compiler doesn't
6468   // need to keep that, and could therefore inline the host function if proven
6469   // worthwhile during optimization. In the other hand, if emitting code for the
6470   // device, the ID has to be the function address so that it can retrieved from
6471   // the offloading entry and launched by the runtime library. We also mark the
6472   // outlined function to have external linkage in case we are emitting code for
6473   // the device, because these functions will be entry points to the device.
6474 
6475   if (CGM.getLangOpts().OpenMPIsDevice) {
6476     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6477     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6478     OutlinedFn->setDSOLocal(false);
6479     if (CGM.getTriple().isAMDGCN())
6480       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6481   } else {
6482     std::string Name = getName({EntryFnName, "region_id"});
6483     OutlinedFnID = new llvm::GlobalVariable(
6484         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6485         llvm::GlobalValue::WeakAnyLinkage,
6486         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6487   }
6488 
6489   // Register the information for the entry associated with this target region.
6490   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6491       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6492       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6493 }
6494 
6495 /// Checks if the expression is constant or does not have non-trivial function
6496 /// calls.
6497 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6498   // We can skip constant expressions.
6499   // We can skip expressions with trivial calls or simple expressions.
6500   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6501           !E->hasNonTrivialCall(Ctx)) &&
6502          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6503 }
6504 
6505 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6506                                                     const Stmt *Body) {
6507   const Stmt *Child = Body->IgnoreContainers();
6508   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6509     Child = nullptr;
6510     for (const Stmt *S : C->body()) {
6511       if (const auto *E = dyn_cast<Expr>(S)) {
6512         if (isTrivial(Ctx, E))
6513           continue;
6514       }
6515       // Some of the statements can be ignored.
6516       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6517           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6518         continue;
6519       // Analyze declarations.
6520       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6521         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6522               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6523                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6524                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6525                   isa<UsingDirectiveDecl>(D) ||
6526                   isa<OMPDeclareReductionDecl>(D) ||
6527                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6528                 return true;
6529               const auto *VD = dyn_cast<VarDecl>(D);
6530               if (!VD)
6531                 return false;
6532               return VD->isConstexpr() ||
6533                      ((VD->getType().isTrivialType(Ctx) ||
6534                        VD->getType()->isReferenceType()) &&
6535                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6536             }))
6537           continue;
6538       }
6539       // Found multiple children - cannot get the one child only.
6540       if (Child)
6541         return nullptr;
6542       Child = S;
6543     }
6544     if (Child)
6545       Child = Child->IgnoreContainers();
6546   }
6547   return Child;
6548 }
6549 
6550 /// Emit the number of teams for a target directive.  Inspect the num_teams
6551 /// clause associated with a teams construct combined or closely nested
6552 /// with the target directive.
6553 ///
6554 /// Emit a team of size one for directives such as 'target parallel' that
6555 /// have no associated teams construct.
6556 ///
6557 /// Otherwise, return nullptr.
6558 static llvm::Value *
6559 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6560                                const OMPExecutableDirective &D) {
6561   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6562          "Clauses associated with the teams directive expected to be emitted "
6563          "only for the host!");
6564   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6565   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6566          "Expected target-based executable directive.");
6567   CGBuilderTy &Bld = CGF.Builder;
6568   switch (DirectiveKind) {
6569   case OMPD_target: {
6570     const auto *CS = D.getInnermostCapturedStmt();
6571     const auto *Body =
6572         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6573     const Stmt *ChildStmt =
6574         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6575     if (const auto *NestedDir =
6576             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6577       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6578         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6579           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6580           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6581           const Expr *NumTeams =
6582               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6583           llvm::Value *NumTeamsVal =
6584               CGF.EmitScalarExpr(NumTeams,
6585                                  /*IgnoreResultAssign*/ true);
6586           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6587                                    /*isSigned=*/true);
6588         }
6589         return Bld.getInt32(0);
6590       }
6591       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6592           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6593         return Bld.getInt32(1);
6594       return Bld.getInt32(0);
6595     }
6596     return nullptr;
6597   }
6598   case OMPD_target_teams:
6599   case OMPD_target_teams_distribute:
6600   case OMPD_target_teams_distribute_simd:
6601   case OMPD_target_teams_distribute_parallel_for:
6602   case OMPD_target_teams_distribute_parallel_for_simd: {
6603     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6604       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6605       const Expr *NumTeams =
6606           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6607       llvm::Value *NumTeamsVal =
6608           CGF.EmitScalarExpr(NumTeams,
6609                              /*IgnoreResultAssign*/ true);
6610       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6611                                /*isSigned=*/true);
6612     }
6613     return Bld.getInt32(0);
6614   }
6615   case OMPD_target_parallel:
6616   case OMPD_target_parallel_for:
6617   case OMPD_target_parallel_for_simd:
6618   case OMPD_target_simd:
6619     return Bld.getInt32(1);
6620   case OMPD_parallel:
6621   case OMPD_for:
6622   case OMPD_parallel_for:
6623   case OMPD_parallel_master:
6624   case OMPD_parallel_sections:
6625   case OMPD_for_simd:
6626   case OMPD_parallel_for_simd:
6627   case OMPD_cancel:
6628   case OMPD_cancellation_point:
6629   case OMPD_ordered:
6630   case OMPD_threadprivate:
6631   case OMPD_allocate:
6632   case OMPD_task:
6633   case OMPD_simd:
6634   case OMPD_sections:
6635   case OMPD_section:
6636   case OMPD_single:
6637   case OMPD_master:
6638   case OMPD_critical:
6639   case OMPD_taskyield:
6640   case OMPD_barrier:
6641   case OMPD_taskwait:
6642   case OMPD_taskgroup:
6643   case OMPD_atomic:
6644   case OMPD_flush:
6645   case OMPD_depobj:
6646   case OMPD_scan:
6647   case OMPD_teams:
6648   case OMPD_target_data:
6649   case OMPD_target_exit_data:
6650   case OMPD_target_enter_data:
6651   case OMPD_distribute:
6652   case OMPD_distribute_simd:
6653   case OMPD_distribute_parallel_for:
6654   case OMPD_distribute_parallel_for_simd:
6655   case OMPD_teams_distribute:
6656   case OMPD_teams_distribute_simd:
6657   case OMPD_teams_distribute_parallel_for:
6658   case OMPD_teams_distribute_parallel_for_simd:
6659   case OMPD_target_update:
6660   case OMPD_declare_simd:
6661   case OMPD_declare_variant:
6662   case OMPD_begin_declare_variant:
6663   case OMPD_end_declare_variant:
6664   case OMPD_declare_target:
6665   case OMPD_end_declare_target:
6666   case OMPD_declare_reduction:
6667   case OMPD_declare_mapper:
6668   case OMPD_taskloop:
6669   case OMPD_taskloop_simd:
6670   case OMPD_master_taskloop:
6671   case OMPD_master_taskloop_simd:
6672   case OMPD_parallel_master_taskloop:
6673   case OMPD_parallel_master_taskloop_simd:
6674   case OMPD_requires:
6675   case OMPD_unknown:
6676     break;
6677   default:
6678     break;
6679   }
6680   llvm_unreachable("Unexpected directive kind.");
6681 }
6682 
6683 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6684                                   llvm::Value *DefaultThreadLimitVal) {
6685   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6686       CGF.getContext(), CS->getCapturedStmt());
6687   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6688     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6689       llvm::Value *NumThreads = nullptr;
6690       llvm::Value *CondVal = nullptr;
6691       // Handle if clause. If if clause present, the number of threads is
6692       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6693       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6694         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6695         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6696         const OMPIfClause *IfClause = nullptr;
6697         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6698           if (C->getNameModifier() == OMPD_unknown ||
6699               C->getNameModifier() == OMPD_parallel) {
6700             IfClause = C;
6701             break;
6702           }
6703         }
6704         if (IfClause) {
6705           const Expr *Cond = IfClause->getCondition();
6706           bool Result;
6707           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6708             if (!Result)
6709               return CGF.Builder.getInt32(1);
6710           } else {
6711             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6712             if (const auto *PreInit =
6713                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6714               for (const auto *I : PreInit->decls()) {
6715                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6716                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6717                 } else {
6718                   CodeGenFunction::AutoVarEmission Emission =
6719                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6720                   CGF.EmitAutoVarCleanups(Emission);
6721                 }
6722               }
6723             }
6724             CondVal = CGF.EvaluateExprAsBool(Cond);
6725           }
6726         }
6727       }
6728       // Check the value of num_threads clause iff if clause was not specified
6729       // or is not evaluated to false.
6730       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6731         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6732         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6733         const auto *NumThreadsClause =
6734             Dir->getSingleClause<OMPNumThreadsClause>();
6735         CodeGenFunction::LexicalScope Scope(
6736             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6737         if (const auto *PreInit =
6738                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6739           for (const auto *I : PreInit->decls()) {
6740             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6741               CGF.EmitVarDecl(cast<VarDecl>(*I));
6742             } else {
6743               CodeGenFunction::AutoVarEmission Emission =
6744                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6745               CGF.EmitAutoVarCleanups(Emission);
6746             }
6747           }
6748         }
6749         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6750         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6751                                                /*isSigned=*/false);
6752         if (DefaultThreadLimitVal)
6753           NumThreads = CGF.Builder.CreateSelect(
6754               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6755               DefaultThreadLimitVal, NumThreads);
6756       } else {
6757         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6758                                            : CGF.Builder.getInt32(0);
6759       }
6760       // Process condition of the if clause.
6761       if (CondVal) {
6762         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6763                                               CGF.Builder.getInt32(1));
6764       }
6765       return NumThreads;
6766     }
6767     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6768       return CGF.Builder.getInt32(1);
6769     return DefaultThreadLimitVal;
6770   }
6771   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6772                                : CGF.Builder.getInt32(0);
6773 }
6774 
6775 /// Emit the number of threads for a target directive.  Inspect the
6776 /// thread_limit clause associated with a teams construct combined or closely
6777 /// nested with the target directive.
6778 ///
6779 /// Emit the num_threads clause for directives such as 'target parallel' that
6780 /// have no associated teams construct.
6781 ///
6782 /// Otherwise, return nullptr.
6783 static llvm::Value *
6784 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6785                                  const OMPExecutableDirective &D) {
6786   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6787          "Clauses associated with the teams directive expected to be emitted "
6788          "only for the host!");
6789   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6790   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6791          "Expected target-based executable directive.");
6792   CGBuilderTy &Bld = CGF.Builder;
6793   llvm::Value *ThreadLimitVal = nullptr;
6794   llvm::Value *NumThreadsVal = nullptr;
6795   switch (DirectiveKind) {
6796   case OMPD_target: {
6797     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6798     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6799       return NumThreads;
6800     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6801         CGF.getContext(), CS->getCapturedStmt());
6802     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6803       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6804         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806         const auto *ThreadLimitClause =
6807             Dir->getSingleClause<OMPThreadLimitClause>();
6808         CodeGenFunction::LexicalScope Scope(
6809             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6810         if (const auto *PreInit =
6811                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6812           for (const auto *I : PreInit->decls()) {
6813             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6814               CGF.EmitVarDecl(cast<VarDecl>(*I));
6815             } else {
6816               CodeGenFunction::AutoVarEmission Emission =
6817                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6818               CGF.EmitAutoVarCleanups(Emission);
6819             }
6820           }
6821         }
6822         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6823             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6824         ThreadLimitVal =
6825             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6826       }
6827       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6828           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6829         CS = Dir->getInnermostCapturedStmt();
6830         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6831             CGF.getContext(), CS->getCapturedStmt());
6832         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6833       }
6834       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6835           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6836         CS = Dir->getInnermostCapturedStmt();
6837         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6838           return NumThreads;
6839       }
6840       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6841         return Bld.getInt32(1);
6842     }
6843     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6844   }
6845   case OMPD_target_teams: {
6846     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6847       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6848       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6849       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6850           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6851       ThreadLimitVal =
6852           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6853     }
6854     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6855     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6856       return NumThreads;
6857     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6858         CGF.getContext(), CS->getCapturedStmt());
6859     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6860       if (Dir->getDirectiveKind() == OMPD_distribute) {
6861         CS = Dir->getInnermostCapturedStmt();
6862         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6863           return NumThreads;
6864       }
6865     }
6866     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6867   }
6868   case OMPD_target_teams_distribute:
6869     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6870       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6871       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6872       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6873           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6874       ThreadLimitVal =
6875           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6876     }
6877     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6878   case OMPD_target_parallel:
6879   case OMPD_target_parallel_for:
6880   case OMPD_target_parallel_for_simd:
6881   case OMPD_target_teams_distribute_parallel_for:
6882   case OMPD_target_teams_distribute_parallel_for_simd: {
6883     llvm::Value *CondVal = nullptr;
6884     // Handle if clause. If if clause present, the number of threads is
6885     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6886     if (D.hasClausesOfKind<OMPIfClause>()) {
6887       const OMPIfClause *IfClause = nullptr;
6888       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6889         if (C->getNameModifier() == OMPD_unknown ||
6890             C->getNameModifier() == OMPD_parallel) {
6891           IfClause = C;
6892           break;
6893         }
6894       }
6895       if (IfClause) {
6896         const Expr *Cond = IfClause->getCondition();
6897         bool Result;
6898         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6899           if (!Result)
6900             return Bld.getInt32(1);
6901         } else {
6902           CodeGenFunction::RunCleanupsScope Scope(CGF);
6903           CondVal = CGF.EvaluateExprAsBool(Cond);
6904         }
6905       }
6906     }
6907     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6908       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6909       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6910       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6911           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6912       ThreadLimitVal =
6913           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6914     }
6915     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6916       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6917       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6918       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6919           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6920       NumThreadsVal =
6921           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6922       ThreadLimitVal = ThreadLimitVal
6923                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6924                                                                 ThreadLimitVal),
6925                                               NumThreadsVal, ThreadLimitVal)
6926                            : NumThreadsVal;
6927     }
6928     if (!ThreadLimitVal)
6929       ThreadLimitVal = Bld.getInt32(0);
6930     if (CondVal)
6931       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6932     return ThreadLimitVal;
6933   }
6934   case OMPD_target_teams_distribute_simd:
6935   case OMPD_target_simd:
6936     return Bld.getInt32(1);
6937   case OMPD_parallel:
6938   case OMPD_for:
6939   case OMPD_parallel_for:
6940   case OMPD_parallel_master:
6941   case OMPD_parallel_sections:
6942   case OMPD_for_simd:
6943   case OMPD_parallel_for_simd:
6944   case OMPD_cancel:
6945   case OMPD_cancellation_point:
6946   case OMPD_ordered:
6947   case OMPD_threadprivate:
6948   case OMPD_allocate:
6949   case OMPD_task:
6950   case OMPD_simd:
6951   case OMPD_sections:
6952   case OMPD_section:
6953   case OMPD_single:
6954   case OMPD_master:
6955   case OMPD_critical:
6956   case OMPD_taskyield:
6957   case OMPD_barrier:
6958   case OMPD_taskwait:
6959   case OMPD_taskgroup:
6960   case OMPD_atomic:
6961   case OMPD_flush:
6962   case OMPD_depobj:
6963   case OMPD_scan:
6964   case OMPD_teams:
6965   case OMPD_target_data:
6966   case OMPD_target_exit_data:
6967   case OMPD_target_enter_data:
6968   case OMPD_distribute:
6969   case OMPD_distribute_simd:
6970   case OMPD_distribute_parallel_for:
6971   case OMPD_distribute_parallel_for_simd:
6972   case OMPD_teams_distribute:
6973   case OMPD_teams_distribute_simd:
6974   case OMPD_teams_distribute_parallel_for:
6975   case OMPD_teams_distribute_parallel_for_simd:
6976   case OMPD_target_update:
6977   case OMPD_declare_simd:
6978   case OMPD_declare_variant:
6979   case OMPD_begin_declare_variant:
6980   case OMPD_end_declare_variant:
6981   case OMPD_declare_target:
6982   case OMPD_end_declare_target:
6983   case OMPD_declare_reduction:
6984   case OMPD_declare_mapper:
6985   case OMPD_taskloop:
6986   case OMPD_taskloop_simd:
6987   case OMPD_master_taskloop:
6988   case OMPD_master_taskloop_simd:
6989   case OMPD_parallel_master_taskloop:
6990   case OMPD_parallel_master_taskloop_simd:
6991   case OMPD_requires:
6992   case OMPD_unknown:
6993     break;
6994   default:
6995     break;
6996   }
6997   llvm_unreachable("Unsupported directive kind.");
6998 }
6999 
7000 namespace {
7001 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7002 
7003 // Utility to handle information from clauses associated with a given
7004 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7005 // It provides a convenient interface to obtain the information and generate
7006 // code for that information.
7007 class MappableExprsHandler {
7008 public:
7009   /// Values for bit flags used to specify the mapping type for
7010   /// offloading.
7011   enum OpenMPOffloadMappingFlags : uint64_t {
7012     /// No flags
7013     OMP_MAP_NONE = 0x0,
7014     /// Allocate memory on the device and move data from host to device.
7015     OMP_MAP_TO = 0x01,
7016     /// Allocate memory on the device and move data from device to host.
7017     OMP_MAP_FROM = 0x02,
7018     /// Always perform the requested mapping action on the element, even
7019     /// if it was already mapped before.
7020     OMP_MAP_ALWAYS = 0x04,
7021     /// Delete the element from the device environment, ignoring the
7022     /// current reference count associated with the element.
7023     OMP_MAP_DELETE = 0x08,
7024     /// The element being mapped is a pointer-pointee pair; both the
7025     /// pointer and the pointee should be mapped.
7026     OMP_MAP_PTR_AND_OBJ = 0x10,
7027     /// This flags signals that the base address of an entry should be
7028     /// passed to the target kernel as an argument.
7029     OMP_MAP_TARGET_PARAM = 0x20,
7030     /// Signal that the runtime library has to return the device pointer
7031     /// in the current position for the data being mapped. Used when we have the
7032     /// use_device_ptr or use_device_addr clause.
7033     OMP_MAP_RETURN_PARAM = 0x40,
7034     /// This flag signals that the reference being passed is a pointer to
7035     /// private data.
7036     OMP_MAP_PRIVATE = 0x80,
7037     /// Pass the element to the device by value.
7038     OMP_MAP_LITERAL = 0x100,
7039     /// Implicit map
7040     OMP_MAP_IMPLICIT = 0x200,
7041     /// Close is a hint to the runtime to allocate memory close to
7042     /// the target device.
7043     OMP_MAP_CLOSE = 0x400,
7044     /// 0x800 is reserved for compatibility with XLC.
7045     /// Produce a runtime error if the data is not already allocated.
7046     OMP_MAP_PRESENT = 0x1000,
7047     /// Signal that the runtime library should use args as an array of
7048     /// descriptor_dim pointers and use args_size as dims. Used when we have
7049     /// non-contiguous list items in target update directive
7050     OMP_MAP_NON_CONTIG = 0x100000000000,
7051     /// The 16 MSBs of the flags indicate whether the entry is member of some
7052     /// struct/class.
7053     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7054     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7055   };
7056 
7057   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7058   static unsigned getFlagMemberOffset() {
7059     unsigned Offset = 0;
7060     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7061          Remain = Remain >> 1)
7062       Offset++;
7063     return Offset;
7064   }
7065 
7066   /// Class that holds debugging information for a data mapping to be passed to
7067   /// the runtime library.
7068   class MappingExprInfo {
7069     /// The variable declaration used for the data mapping.
7070     const ValueDecl *MapDecl = nullptr;
7071     /// The original expression used in the map clause, or null if there is
7072     /// none.
7073     const Expr *MapExpr = nullptr;
7074 
7075   public:
7076     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7077         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7078 
7079     const ValueDecl *getMapDecl() const { return MapDecl; }
7080     const Expr *getMapExpr() const { return MapExpr; }
7081   };
7082 
7083   /// Class that associates information with a base pointer to be passed to the
7084   /// runtime library.
7085   class BasePointerInfo {
7086     /// The base pointer.
7087     llvm::Value *Ptr = nullptr;
7088     /// The base declaration that refers to this device pointer, or null if
7089     /// there is none.
7090     const ValueDecl *DevPtrDecl = nullptr;
7091 
7092   public:
7093     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7094         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7095     llvm::Value *operator*() const { return Ptr; }
7096     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7097     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7098   };
7099 
7100   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7101   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7102   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7103   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7104   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7105   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7106   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7107 
7108   /// This structure contains combined information generated for mappable
7109   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7110   /// mappers, and non-contiguous information.
7111   struct MapCombinedInfoTy {
7112     struct StructNonContiguousInfo {
7113       bool IsNonContiguous = false;
7114       MapDimArrayTy Dims;
7115       MapNonContiguousArrayTy Offsets;
7116       MapNonContiguousArrayTy Counts;
7117       MapNonContiguousArrayTy Strides;
7118     };
7119     MapExprsArrayTy Exprs;
7120     MapBaseValuesArrayTy BasePointers;
7121     MapValuesArrayTy Pointers;
7122     MapValuesArrayTy Sizes;
7123     MapFlagsArrayTy Types;
7124     MapMappersArrayTy Mappers;
7125     StructNonContiguousInfo NonContigInfo;
7126 
7127     /// Append arrays in \a CurInfo.
7128     void append(MapCombinedInfoTy &CurInfo) {
7129       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7130       BasePointers.append(CurInfo.BasePointers.begin(),
7131                           CurInfo.BasePointers.end());
7132       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7133       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7134       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7135       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7136       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7137                                  CurInfo.NonContigInfo.Dims.end());
7138       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7139                                     CurInfo.NonContigInfo.Offsets.end());
7140       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7141                                    CurInfo.NonContigInfo.Counts.end());
7142       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7143                                     CurInfo.NonContigInfo.Strides.end());
7144     }
7145   };
7146 
7147   /// Map between a struct and the its lowest & highest elements which have been
7148   /// mapped.
7149   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7150   ///                    HE(FieldIndex, Pointer)}
7151   struct StructRangeInfoTy {
7152     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7153         0, Address::invalid()};
7154     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7155         0, Address::invalid()};
7156     Address Base = Address::invalid();
7157     bool IsArraySection = false;
7158   };
7159 
7160 private:
7161   /// Kind that defines how a device pointer has to be returned.
7162   struct MapInfo {
7163     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7164     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7165     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7166     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7167     bool ReturnDevicePointer = false;
7168     bool IsImplicit = false;
7169     const ValueDecl *Mapper = nullptr;
7170     const Expr *VarRef = nullptr;
7171     bool ForDeviceAddr = false;
7172 
7173     MapInfo() = default;
7174     MapInfo(
7175         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7176         OpenMPMapClauseKind MapType,
7177         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7178         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7179         bool ReturnDevicePointer, bool IsImplicit,
7180         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7181         bool ForDeviceAddr = false)
7182         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7183           MotionModifiers(MotionModifiers),
7184           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7185           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7186   };
7187 
7188   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7189   /// member and there is no map information about it, then emission of that
7190   /// entry is deferred until the whole struct has been processed.
7191   struct DeferredDevicePtrEntryTy {
7192     const Expr *IE = nullptr;
7193     const ValueDecl *VD = nullptr;
7194     bool ForDeviceAddr = false;
7195 
7196     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7197                              bool ForDeviceAddr)
7198         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7199   };
7200 
7201   /// The target directive from where the mappable clauses were extracted. It
7202   /// is either a executable directive or a user-defined mapper directive.
7203   llvm::PointerUnion<const OMPExecutableDirective *,
7204                      const OMPDeclareMapperDecl *>
7205       CurDir;
7206 
7207   /// Function the directive is being generated for.
7208   CodeGenFunction &CGF;
7209 
7210   /// Set of all first private variables in the current directive.
7211   /// bool data is set to true if the variable is implicitly marked as
7212   /// firstprivate, false otherwise.
7213   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7214 
7215   /// Map between device pointer declarations and their expression components.
7216   /// The key value for declarations in 'this' is null.
7217   llvm::DenseMap<
7218       const ValueDecl *,
7219       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7220       DevPointersMap;
7221 
7222   llvm::Value *getExprTypeSize(const Expr *E) const {
7223     QualType ExprTy = E->getType().getCanonicalType();
7224 
7225     // Calculate the size for array shaping expression.
7226     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7227       llvm::Value *Size =
7228           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7229       for (const Expr *SE : OAE->getDimensions()) {
7230         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7231         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7232                                       CGF.getContext().getSizeType(),
7233                                       SE->getExprLoc());
7234         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7235       }
7236       return Size;
7237     }
7238 
7239     // Reference types are ignored for mapping purposes.
7240     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7241       ExprTy = RefTy->getPointeeType().getCanonicalType();
7242 
7243     // Given that an array section is considered a built-in type, we need to
7244     // do the calculation based on the length of the section instead of relying
7245     // on CGF.getTypeSize(E->getType()).
7246     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7247       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7248                             OAE->getBase()->IgnoreParenImpCasts())
7249                             .getCanonicalType();
7250 
7251       // If there is no length associated with the expression and lower bound is
7252       // not specified too, that means we are using the whole length of the
7253       // base.
7254       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7255           !OAE->getLowerBound())
7256         return CGF.getTypeSize(BaseTy);
7257 
7258       llvm::Value *ElemSize;
7259       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7260         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7261       } else {
7262         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7263         assert(ATy && "Expecting array type if not a pointer type.");
7264         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7265       }
7266 
7267       // If we don't have a length at this point, that is because we have an
7268       // array section with a single element.
7269       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7270         return ElemSize;
7271 
7272       if (const Expr *LenExpr = OAE->getLength()) {
7273         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7274         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7275                                              CGF.getContext().getSizeType(),
7276                                              LenExpr->getExprLoc());
7277         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7278       }
7279       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7280              OAE->getLowerBound() && "expected array_section[lb:].");
7281       // Size = sizetype - lb * elemtype;
7282       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7283       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7284       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7285                                        CGF.getContext().getSizeType(),
7286                                        OAE->getLowerBound()->getExprLoc());
7287       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7288       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7289       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7290       LengthVal = CGF.Builder.CreateSelect(
7291           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7292       return LengthVal;
7293     }
7294     return CGF.getTypeSize(ExprTy);
7295   }
7296 
7297   /// Return the corresponding bits for a given map clause modifier. Add
7298   /// a flag marking the map as a pointer if requested. Add a flag marking the
7299   /// map as the first one of a series of maps that relate to the same map
7300   /// expression.
7301   OpenMPOffloadMappingFlags getMapTypeBits(
7302       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7303       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7304       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7305     OpenMPOffloadMappingFlags Bits =
7306         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7307     switch (MapType) {
7308     case OMPC_MAP_alloc:
7309     case OMPC_MAP_release:
7310       // alloc and release is the default behavior in the runtime library,  i.e.
7311       // if we don't pass any bits alloc/release that is what the runtime is
7312       // going to do. Therefore, we don't need to signal anything for these two
7313       // type modifiers.
7314       break;
7315     case OMPC_MAP_to:
7316       Bits |= OMP_MAP_TO;
7317       break;
7318     case OMPC_MAP_from:
7319       Bits |= OMP_MAP_FROM;
7320       break;
7321     case OMPC_MAP_tofrom:
7322       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7323       break;
7324     case OMPC_MAP_delete:
7325       Bits |= OMP_MAP_DELETE;
7326       break;
7327     case OMPC_MAP_unknown:
7328       llvm_unreachable("Unexpected map type!");
7329     }
7330     if (AddPtrFlag)
7331       Bits |= OMP_MAP_PTR_AND_OBJ;
7332     if (AddIsTargetParamFlag)
7333       Bits |= OMP_MAP_TARGET_PARAM;
7334     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7335         != MapModifiers.end())
7336       Bits |= OMP_MAP_ALWAYS;
7337     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7338         != MapModifiers.end())
7339       Bits |= OMP_MAP_CLOSE;
7340     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7341         != MapModifiers.end())
7342       Bits |= OMP_MAP_PRESENT;
7343     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7344         != MotionModifiers.end())
7345       Bits |= OMP_MAP_PRESENT;
7346     if (IsNonContiguous)
7347       Bits |= OMP_MAP_NON_CONTIG;
7348     return Bits;
7349   }
7350 
7351   /// Return true if the provided expression is a final array section. A
7352   /// final array section, is one whose length can't be proved to be one.
7353   bool isFinalArraySectionExpression(const Expr *E) const {
7354     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7355 
7356     // It is not an array section and therefore not a unity-size one.
7357     if (!OASE)
7358       return false;
7359 
7360     // An array section with no colon always refer to a single element.
7361     if (OASE->getColonLocFirst().isInvalid())
7362       return false;
7363 
7364     const Expr *Length = OASE->getLength();
7365 
7366     // If we don't have a length we have to check if the array has size 1
7367     // for this dimension. Also, we should always expect a length if the
7368     // base type is pointer.
7369     if (!Length) {
7370       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7371                              OASE->getBase()->IgnoreParenImpCasts())
7372                              .getCanonicalType();
7373       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7374         return ATy->getSize().getSExtValue() != 1;
7375       // If we don't have a constant dimension length, we have to consider
7376       // the current section as having any size, so it is not necessarily
7377       // unitary. If it happen to be unity size, that's user fault.
7378       return true;
7379     }
7380 
7381     // Check if the length evaluates to 1.
7382     Expr::EvalResult Result;
7383     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7384       return true; // Can have more that size 1.
7385 
7386     llvm::APSInt ConstLength = Result.Val.getInt();
7387     return ConstLength.getSExtValue() != 1;
7388   }
7389 
7390   /// Generate the base pointers, section pointers, sizes, map type bits, and
7391   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7392   /// map type, map or motion modifiers, and expression components.
7393   /// \a IsFirstComponent should be set to true if the provided set of
7394   /// components is the first associated with a capture.
7395   void generateInfoForComponentList(
7396       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7397       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7398       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7399       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7400       bool IsFirstComponentList, bool IsImplicit,
7401       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7402       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7403       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7404           OverlappedElements = llvm::None) const {
7405     // The following summarizes what has to be generated for each map and the
7406     // types below. The generated information is expressed in this order:
7407     // base pointer, section pointer, size, flags
7408     // (to add to the ones that come from the map type and modifier).
7409     //
7410     // double d;
7411     // int i[100];
7412     // float *p;
7413     //
7414     // struct S1 {
7415     //   int i;
7416     //   float f[50];
7417     // }
7418     // struct S2 {
7419     //   int i;
7420     //   float f[50];
7421     //   S1 s;
7422     //   double *p;
7423     //   struct S2 *ps;
7424     // }
7425     // S2 s;
7426     // S2 *ps;
7427     //
7428     // map(d)
7429     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7430     //
7431     // map(i)
7432     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7433     //
7434     // map(i[1:23])
7435     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7436     //
7437     // map(p)
7438     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7439     //
7440     // map(p[1:24])
7441     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7442     // in unified shared memory mode or for local pointers
7443     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7444     //
7445     // map(s)
7446     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7447     //
7448     // map(s.i)
7449     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7450     //
7451     // map(s.s.f)
7452     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7453     //
7454     // map(s.p)
7455     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7456     //
7457     // map(to: s.p[:22])
7458     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7459     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7460     // &(s.p), &(s.p[0]), 22*sizeof(double),
7461     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7462     // (*) alloc space for struct members, only this is a target parameter
7463     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7464     //      optimizes this entry out, same in the examples below)
7465     // (***) map the pointee (map: to)
7466     //
7467     // map(s.ps)
7468     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7469     //
7470     // map(from: s.ps->s.i)
7471     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7472     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7473     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7474     //
7475     // map(to: s.ps->ps)
7476     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7477     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7478     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7479     //
7480     // map(s.ps->ps->ps)
7481     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7482     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7483     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7484     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7485     //
7486     // map(to: s.ps->ps->s.f[:22])
7487     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7488     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7489     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7490     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7491     //
7492     // map(ps)
7493     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7494     //
7495     // map(ps->i)
7496     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7497     //
7498     // map(ps->s.f)
7499     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7500     //
7501     // map(from: ps->p)
7502     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7503     //
7504     // map(to: ps->p[:22])
7505     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7506     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7507     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7508     //
7509     // map(ps->ps)
7510     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7511     //
7512     // map(from: ps->ps->s.i)
7513     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7514     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7515     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7516     //
7517     // map(from: ps->ps->ps)
7518     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7519     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7520     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7521     //
7522     // map(ps->ps->ps->ps)
7523     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7524     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7525     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7526     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7527     //
7528     // map(to: ps->ps->ps->s.f[:22])
7529     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7530     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7531     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7532     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7533     //
7534     // map(to: s.f[:22]) map(from: s.p[:33])
7535     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7536     //     sizeof(double*) (**), TARGET_PARAM
7537     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7538     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7539     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7540     // (*) allocate contiguous space needed to fit all mapped members even if
7541     //     we allocate space for members not mapped (in this example,
7542     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7543     //     them as well because they fall between &s.f[0] and &s.p)
7544     //
7545     // map(from: s.f[:22]) map(to: ps->p[:33])
7546     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7547     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7548     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7549     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7550     // (*) the struct this entry pertains to is the 2nd element in the list of
7551     //     arguments, hence MEMBER_OF(2)
7552     //
7553     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7554     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7555     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7556     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7557     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7558     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7559     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7560     // (*) the struct this entry pertains to is the 4th element in the list
7561     //     of arguments, hence MEMBER_OF(4)
7562 
7563     // Track if the map information being generated is the first for a capture.
7564     bool IsCaptureFirstInfo = IsFirstComponentList;
7565     // When the variable is on a declare target link or in a to clause with
7566     // unified memory, a reference is needed to hold the host/device address
7567     // of the variable.
7568     bool RequiresReference = false;
7569 
7570     // Scan the components from the base to the complete expression.
7571     auto CI = Components.rbegin();
7572     auto CE = Components.rend();
7573     auto I = CI;
7574 
7575     // Track if the map information being generated is the first for a list of
7576     // components.
7577     bool IsExpressionFirstInfo = true;
7578     bool FirstPointerInComplexData = false;
7579     Address BP = Address::invalid();
7580     const Expr *AssocExpr = I->getAssociatedExpression();
7581     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7582     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7583     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7584 
7585     if (isa<MemberExpr>(AssocExpr)) {
7586       // The base is the 'this' pointer. The content of the pointer is going
7587       // to be the base of the field being mapped.
7588       BP = CGF.LoadCXXThisAddress();
7589     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7590                (OASE &&
7591                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7592       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7593     } else if (OAShE &&
7594                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7595       BP = Address(
7596           CGF.EmitScalarExpr(OAShE->getBase()),
7597           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7598     } else {
7599       // The base is the reference to the variable.
7600       // BP = &Var.
7601       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7602       if (const auto *VD =
7603               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7604         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7605                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7606           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7607               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7608                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7609             RequiresReference = true;
7610             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7611           }
7612         }
7613       }
7614 
7615       // If the variable is a pointer and is being dereferenced (i.e. is not
7616       // the last component), the base has to be the pointer itself, not its
7617       // reference. References are ignored for mapping purposes.
7618       QualType Ty =
7619           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7620       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7621         // No need to generate individual map information for the pointer, it
7622         // can be associated with the combined storage if shared memory mode is
7623         // active or the base declaration is not global variable.
7624         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7625         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7626             !VD || VD->hasLocalStorage())
7627           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7628         else
7629           FirstPointerInComplexData = true;
7630         ++I;
7631       }
7632     }
7633 
7634     // Track whether a component of the list should be marked as MEMBER_OF some
7635     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7636     // in a component list should be marked as MEMBER_OF, all subsequent entries
7637     // do not belong to the base struct. E.g.
7638     // struct S2 s;
7639     // s.ps->ps->ps->f[:]
7640     //   (1) (2) (3) (4)
7641     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7642     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7643     // is the pointee of ps(2) which is not member of struct s, so it should not
7644     // be marked as such (it is still PTR_AND_OBJ).
7645     // The variable is initialized to false so that PTR_AND_OBJ entries which
7646     // are not struct members are not considered (e.g. array of pointers to
7647     // data).
7648     bool ShouldBeMemberOf = false;
7649 
7650     // Variable keeping track of whether or not we have encountered a component
7651     // in the component list which is a member expression. Useful when we have a
7652     // pointer or a final array section, in which case it is the previous
7653     // component in the list which tells us whether we have a member expression.
7654     // E.g. X.f[:]
7655     // While processing the final array section "[:]" it is "f" which tells us
7656     // whether we are dealing with a member of a declared struct.
7657     const MemberExpr *EncounteredME = nullptr;
7658 
7659     // Track for the total number of dimension. Start from one for the dummy
7660     // dimension.
7661     uint64_t DimSize = 1;
7662 
7663     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7664 
7665     for (; I != CE; ++I) {
7666       // If the current component is member of a struct (parent struct) mark it.
7667       if (!EncounteredME) {
7668         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7669         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7670         // as MEMBER_OF the parent struct.
7671         if (EncounteredME) {
7672           ShouldBeMemberOf = true;
7673           // Do not emit as complex pointer if this is actually not array-like
7674           // expression.
7675           if (FirstPointerInComplexData) {
7676             QualType Ty = std::prev(I)
7677                               ->getAssociatedDeclaration()
7678                               ->getType()
7679                               .getNonReferenceType();
7680             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7681             FirstPointerInComplexData = false;
7682           }
7683         }
7684       }
7685 
7686       auto Next = std::next(I);
7687 
7688       // We need to generate the addresses and sizes if this is the last
7689       // component, if the component is a pointer or if it is an array section
7690       // whose length can't be proved to be one. If this is a pointer, it
7691       // becomes the base address for the following components.
7692 
7693       // A final array section, is one whose length can't be proved to be one.
7694       // If the map item is non-contiguous then we don't treat any array section
7695       // as final array section.
7696       bool IsFinalArraySection =
7697           !IsNonContiguous &&
7698           isFinalArraySectionExpression(I->getAssociatedExpression());
7699 
7700       // If we have a declaration for the mapping use that, otherwise use
7701       // the base declaration of the map clause.
7702       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7703                                      ? I->getAssociatedDeclaration()
7704                                      : BaseDecl;
7705 
7706       // Get information on whether the element is a pointer. Have to do a
7707       // special treatment for array sections given that they are built-in
7708       // types.
7709       const auto *OASE =
7710           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7711       const auto *OAShE =
7712           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7713       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7714       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7715       bool IsPointer =
7716           OAShE ||
7717           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7718                        .getCanonicalType()
7719                        ->isAnyPointerType()) ||
7720           I->getAssociatedExpression()->getType()->isAnyPointerType();
7721       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7722 
7723       if (OASE)
7724         ++DimSize;
7725 
7726       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7727         // If this is not the last component, we expect the pointer to be
7728         // associated with an array expression or member expression.
7729         assert((Next == CE ||
7730                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7731                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7732                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7733                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7734                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7735                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7736                "Unexpected expression");
7737 
7738         Address LB = Address::invalid();
7739         if (OAShE) {
7740           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7741                        CGF.getContext().getTypeAlignInChars(
7742                            OAShE->getBase()->getType()));
7743         } else {
7744           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7745                    .getAddress(CGF);
7746         }
7747 
7748         // If this component is a pointer inside the base struct then we don't
7749         // need to create any entry for it - it will be combined with the object
7750         // it is pointing to into a single PTR_AND_OBJ entry.
7751         bool IsMemberPointerOrAddr =
7752             (IsPointer || ForDeviceAddr) && EncounteredME &&
7753             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7754              EncounteredME);
7755         if (!OverlappedElements.empty()) {
7756           // Handle base element with the info for overlapped elements.
7757           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7758           assert(Next == CE &&
7759                  "Expected last element for the overlapped elements.");
7760           assert(!IsPointer &&
7761                  "Unexpected base element with the pointer type.");
7762           // Mark the whole struct as the struct that requires allocation on the
7763           // device.
7764           PartialStruct.LowestElem = {0, LB};
7765           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7766               I->getAssociatedExpression()->getType());
7767           Address HB = CGF.Builder.CreateConstGEP(
7768               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7769                                                               CGF.VoidPtrTy),
7770               TypeSize.getQuantity() - 1);
7771           PartialStruct.HighestElem = {
7772               std::numeric_limits<decltype(
7773                   PartialStruct.HighestElem.first)>::max(),
7774               HB};
7775           PartialStruct.Base = BP;
7776           // Emit data for non-overlapped data.
7777           OpenMPOffloadMappingFlags Flags =
7778               OMP_MAP_MEMBER_OF |
7779               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7780                              /*AddPtrFlag=*/false,
7781                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7782           LB = BP;
7783           llvm::Value *Size = nullptr;
7784           // Do bitcopy of all non-overlapped structure elements.
7785           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7786                    Component : OverlappedElements) {
7787             Address ComponentLB = Address::invalid();
7788             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7789                  Component) {
7790               if (MC.getAssociatedDeclaration()) {
7791                 ComponentLB =
7792                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7793                         .getAddress(CGF);
7794                 Size = CGF.Builder.CreatePtrDiff(
7795                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7796                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7797                 break;
7798               }
7799             }
7800             assert(Size && "Failed to determine structure size");
7801             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7802             CombinedInfo.BasePointers.push_back(BP.getPointer());
7803             CombinedInfo.Pointers.push_back(LB.getPointer());
7804             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7805                 Size, CGF.Int64Ty, /*isSigned=*/true));
7806             CombinedInfo.Types.push_back(Flags);
7807             CombinedInfo.Mappers.push_back(nullptr);
7808             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7809                                                                       : 1);
7810             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7811           }
7812           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7813           CombinedInfo.BasePointers.push_back(BP.getPointer());
7814           CombinedInfo.Pointers.push_back(LB.getPointer());
7815           Size = CGF.Builder.CreatePtrDiff(
7816               CGF.EmitCastToVoidPtr(
7817                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7818               CGF.EmitCastToVoidPtr(LB.getPointer()));
7819           CombinedInfo.Sizes.push_back(
7820               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7821           CombinedInfo.Types.push_back(Flags);
7822           CombinedInfo.Mappers.push_back(nullptr);
7823           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7824                                                                     : 1);
7825           break;
7826         }
7827         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7828         if (!IsMemberPointerOrAddr ||
7829             (Next == CE && MapType != OMPC_MAP_unknown)) {
7830           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7831           CombinedInfo.BasePointers.push_back(BP.getPointer());
7832           CombinedInfo.Pointers.push_back(LB.getPointer());
7833           CombinedInfo.Sizes.push_back(
7834               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7835           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7836                                                                     : 1);
7837 
7838           // If Mapper is valid, the last component inherits the mapper.
7839           bool HasMapper = Mapper && Next == CE;
7840           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7841 
7842           // We need to add a pointer flag for each map that comes from the
7843           // same expression except for the first one. We also need to signal
7844           // this map is the first one that relates with the current capture
7845           // (there is a set of entries for each capture).
7846           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7847               MapType, MapModifiers, MotionModifiers, IsImplicit,
7848               !IsExpressionFirstInfo || RequiresReference ||
7849                   FirstPointerInComplexData,
7850               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7851 
7852           if (!IsExpressionFirstInfo) {
7853             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7854             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7855             if (IsPointer)
7856               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7857                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7858 
7859             if (ShouldBeMemberOf) {
7860               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7861               // should be later updated with the correct value of MEMBER_OF.
7862               Flags |= OMP_MAP_MEMBER_OF;
7863               // From now on, all subsequent PTR_AND_OBJ entries should not be
7864               // marked as MEMBER_OF.
7865               ShouldBeMemberOf = false;
7866             }
7867           }
7868 
7869           CombinedInfo.Types.push_back(Flags);
7870         }
7871 
7872         // If we have encountered a member expression so far, keep track of the
7873         // mapped member. If the parent is "*this", then the value declaration
7874         // is nullptr.
7875         if (EncounteredME) {
7876           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7877           unsigned FieldIndex = FD->getFieldIndex();
7878 
7879           // Update info about the lowest and highest elements for this struct
7880           if (!PartialStruct.Base.isValid()) {
7881             PartialStruct.LowestElem = {FieldIndex, LB};
7882             if (IsFinalArraySection) {
7883               Address HB =
7884                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7885                       .getAddress(CGF);
7886               PartialStruct.HighestElem = {FieldIndex, HB};
7887             } else {
7888               PartialStruct.HighestElem = {FieldIndex, LB};
7889             }
7890             PartialStruct.Base = BP;
7891           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7892             PartialStruct.LowestElem = {FieldIndex, LB};
7893           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7894             PartialStruct.HighestElem = {FieldIndex, LB};
7895           }
7896         }
7897 
7898         // Need to emit combined struct for array sections.
7899         if (IsFinalArraySection || IsNonContiguous)
7900           PartialStruct.IsArraySection = true;
7901 
7902         // If we have a final array section, we are done with this expression.
7903         if (IsFinalArraySection)
7904           break;
7905 
7906         // The pointer becomes the base for the next element.
7907         if (Next != CE)
7908           BP = LB;
7909 
7910         IsExpressionFirstInfo = false;
7911         IsCaptureFirstInfo = false;
7912         FirstPointerInComplexData = false;
7913       } else if (FirstPointerInComplexData) {
7914         QualType Ty = Components.rbegin()
7915                           ->getAssociatedDeclaration()
7916                           ->getType()
7917                           .getNonReferenceType();
7918         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7919         FirstPointerInComplexData = false;
7920       }
7921     }
7922 
7923     if (!IsNonContiguous)
7924       return;
7925 
7926     const ASTContext &Context = CGF.getContext();
7927 
7928     // For supporting stride in array section, we need to initialize the first
7929     // dimension size as 1, first offset as 0, and first count as 1
7930     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7931     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7932     MapValuesArrayTy CurStrides;
7933     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7934     uint64_t ElementTypeSize;
7935 
7936     // Collect Size information for each dimension and get the element size as
7937     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7938     // should be [10, 10] and the first stride is 4 btyes.
7939     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7940          Components) {
7941       const Expr *AssocExpr = Component.getAssociatedExpression();
7942       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7943 
7944       if (!OASE)
7945         continue;
7946 
7947       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7948       auto *CAT = Context.getAsConstantArrayType(Ty);
7949       auto *VAT = Context.getAsVariableArrayType(Ty);
7950 
7951       // We need all the dimension size except for the last dimension.
7952       assert((VAT || CAT || &Component == &*Components.begin()) &&
7953              "Should be either ConstantArray or VariableArray if not the "
7954              "first Component");
7955 
7956       // Get element size if CurStrides is empty.
7957       if (CurStrides.empty()) {
7958         const Type *ElementType = nullptr;
7959         if (CAT)
7960           ElementType = CAT->getElementType().getTypePtr();
7961         else if (VAT)
7962           ElementType = VAT->getElementType().getTypePtr();
7963         else
7964           assert(&Component == &*Components.begin() &&
7965                  "Only expect pointer (non CAT or VAT) when this is the "
7966                  "first Component");
7967         // If ElementType is null, then it means the base is a pointer
7968         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7969         // for next iteration.
7970         if (ElementType) {
7971           // For the case that having pointer as base, we need to remove one
7972           // level of indirection.
7973           if (&Component != &*Components.begin())
7974             ElementType = ElementType->getPointeeOrArrayElementType();
7975           ElementTypeSize =
7976               Context.getTypeSizeInChars(ElementType).getQuantity();
7977           CurStrides.push_back(
7978               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7979         }
7980       }
7981       // Get dimension value except for the last dimension since we don't need
7982       // it.
7983       if (DimSizes.size() < Components.size() - 1) {
7984         if (CAT)
7985           DimSizes.push_back(llvm::ConstantInt::get(
7986               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7987         else if (VAT)
7988           DimSizes.push_back(CGF.Builder.CreateIntCast(
7989               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7990               /*IsSigned=*/false));
7991       }
7992     }
7993 
7994     // Skip the dummy dimension since we have already have its information.
7995     auto DI = DimSizes.begin() + 1;
7996     // Product of dimension.
7997     llvm::Value *DimProd =
7998         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7999 
8000     // Collect info for non-contiguous. Notice that offset, count, and stride
8001     // are only meaningful for array-section, so we insert a null for anything
8002     // other than array-section.
8003     // Also, the size of offset, count, and stride are not the same as
8004     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8005     // count, and stride are the same as the number of non-contiguous
8006     // declaration in target update to/from clause.
8007     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8008          Components) {
8009       const Expr *AssocExpr = Component.getAssociatedExpression();
8010 
8011       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8012         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8013             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8014             /*isSigned=*/false);
8015         CurOffsets.push_back(Offset);
8016         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8017         CurStrides.push_back(CurStrides.back());
8018         continue;
8019       }
8020 
8021       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8022 
8023       if (!OASE)
8024         continue;
8025 
8026       // Offset
8027       const Expr *OffsetExpr = OASE->getLowerBound();
8028       llvm::Value *Offset = nullptr;
8029       if (!OffsetExpr) {
8030         // If offset is absent, then we just set it to zero.
8031         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8032       } else {
8033         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8034                                            CGF.Int64Ty,
8035                                            /*isSigned=*/false);
8036       }
8037       CurOffsets.push_back(Offset);
8038 
8039       // Count
8040       const Expr *CountExpr = OASE->getLength();
8041       llvm::Value *Count = nullptr;
8042       if (!CountExpr) {
8043         // In Clang, once a high dimension is an array section, we construct all
8044         // the lower dimension as array section, however, for case like
8045         // arr[0:2][2], Clang construct the inner dimension as an array section
8046         // but it actually is not in an array section form according to spec.
8047         if (!OASE->getColonLocFirst().isValid() &&
8048             !OASE->getColonLocSecond().isValid()) {
8049           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8050         } else {
8051           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8052           // When the length is absent it defaults to ⌈(size −
8053           // lower-bound)/stride⌉, where size is the size of the array
8054           // dimension.
8055           const Expr *StrideExpr = OASE->getStride();
8056           llvm::Value *Stride =
8057               StrideExpr
8058                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8059                                               CGF.Int64Ty, /*isSigned=*/false)
8060                   : nullptr;
8061           if (Stride)
8062             Count = CGF.Builder.CreateUDiv(
8063                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8064           else
8065             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8066         }
8067       } else {
8068         Count = CGF.EmitScalarExpr(CountExpr);
8069       }
8070       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8071       CurCounts.push_back(Count);
8072 
8073       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8074       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8075       //              Offset      Count     Stride
8076       //    D0          0           1         4    (int)    <- dummy dimension
8077       //    D1          0           2         8    (2 * (1) * 4)
8078       //    D2          1           2         20   (1 * (1 * 5) * 4)
8079       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8080       const Expr *StrideExpr = OASE->getStride();
8081       llvm::Value *Stride =
8082           StrideExpr
8083               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8084                                           CGF.Int64Ty, /*isSigned=*/false)
8085               : nullptr;
8086       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8087       if (Stride)
8088         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8089       else
8090         CurStrides.push_back(DimProd);
8091       if (DI != DimSizes.end())
8092         ++DI;
8093     }
8094 
8095     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8096     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8097     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8098   }
8099 
8100   /// Return the adjusted map modifiers if the declaration a capture refers to
8101   /// appears in a first-private clause. This is expected to be used only with
8102   /// directives that start with 'target'.
8103   MappableExprsHandler::OpenMPOffloadMappingFlags
8104   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8105     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8106 
8107     // A first private variable captured by reference will use only the
8108     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8109     // declaration is known as first-private in this handler.
8110     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8111       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8112           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8113         return MappableExprsHandler::OMP_MAP_ALWAYS |
8114                MappableExprsHandler::OMP_MAP_TO;
8115       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8116         return MappableExprsHandler::OMP_MAP_TO |
8117                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8118       return MappableExprsHandler::OMP_MAP_PRIVATE |
8119              MappableExprsHandler::OMP_MAP_TO;
8120     }
8121     return MappableExprsHandler::OMP_MAP_TO |
8122            MappableExprsHandler::OMP_MAP_FROM;
8123   }
8124 
8125   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8126     // Rotate by getFlagMemberOffset() bits.
8127     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8128                                                   << getFlagMemberOffset());
8129   }
8130 
8131   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8132                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8133     // If the entry is PTR_AND_OBJ but has not been marked with the special
8134     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8135     // marked as MEMBER_OF.
8136     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8137         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8138       return;
8139 
8140     // Reset the placeholder value to prepare the flag for the assignment of the
8141     // proper MEMBER_OF value.
8142     Flags &= ~OMP_MAP_MEMBER_OF;
8143     Flags |= MemberOfFlag;
8144   }
8145 
8146   void getPlainLayout(const CXXRecordDecl *RD,
8147                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8148                       bool AsBase) const {
8149     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8150 
8151     llvm::StructType *St =
8152         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8153 
8154     unsigned NumElements = St->getNumElements();
8155     llvm::SmallVector<
8156         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8157         RecordLayout(NumElements);
8158 
8159     // Fill bases.
8160     for (const auto &I : RD->bases()) {
8161       if (I.isVirtual())
8162         continue;
8163       const auto *Base = I.getType()->getAsCXXRecordDecl();
8164       // Ignore empty bases.
8165       if (Base->isEmpty() || CGF.getContext()
8166                                  .getASTRecordLayout(Base)
8167                                  .getNonVirtualSize()
8168                                  .isZero())
8169         continue;
8170 
8171       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8172       RecordLayout[FieldIndex] = Base;
8173     }
8174     // Fill in virtual bases.
8175     for (const auto &I : RD->vbases()) {
8176       const auto *Base = I.getType()->getAsCXXRecordDecl();
8177       // Ignore empty bases.
8178       if (Base->isEmpty())
8179         continue;
8180       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8181       if (RecordLayout[FieldIndex])
8182         continue;
8183       RecordLayout[FieldIndex] = Base;
8184     }
8185     // Fill in all the fields.
8186     assert(!RD->isUnion() && "Unexpected union.");
8187     for (const auto *Field : RD->fields()) {
8188       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8189       // will fill in later.)
8190       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8191         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8192         RecordLayout[FieldIndex] = Field;
8193       }
8194     }
8195     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8196              &Data : RecordLayout) {
8197       if (Data.isNull())
8198         continue;
8199       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8200         getPlainLayout(Base, Layout, /*AsBase=*/true);
8201       else
8202         Layout.push_back(Data.get<const FieldDecl *>());
8203     }
8204   }
8205 
8206 public:
8207   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8208       : CurDir(&Dir), CGF(CGF) {
8209     // Extract firstprivate clause information.
8210     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8211       for (const auto *D : C->varlists())
8212         FirstPrivateDecls.try_emplace(
8213             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8214     // Extract implicit firstprivates from uses_allocators clauses.
8215     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8216       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8217         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8218         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8219           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8220                                         /*Implicit=*/true);
8221         else if (const auto *VD = dyn_cast<VarDecl>(
8222                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8223                          ->getDecl()))
8224           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8225       }
8226     }
8227     // Extract device pointer clause information.
8228     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8229       for (auto L : C->component_lists())
8230         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8231   }
8232 
8233   /// Constructor for the declare mapper directive.
8234   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8235       : CurDir(&Dir), CGF(CGF) {}
8236 
8237   /// Generate code for the combined entry if we have a partially mapped struct
8238   /// and take care of the mapping flags of the arguments corresponding to
8239   /// individual struct members.
8240   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8241                          MapFlagsArrayTy &CurTypes,
8242                          const StructRangeInfoTy &PartialStruct,
8243                          const ValueDecl *VD = nullptr,
8244                          bool NotTargetParams = true) const {
8245     if (CurTypes.size() == 1 &&
8246         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8247         !PartialStruct.IsArraySection)
8248       return;
8249     CombinedInfo.Exprs.push_back(VD);
8250     // Base is the base of the struct
8251     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8252     // Pointer is the address of the lowest element
8253     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8254     CombinedInfo.Pointers.push_back(LB);
8255     // There should not be a mapper for a combined entry.
8256     CombinedInfo.Mappers.push_back(nullptr);
8257     // Size is (addr of {highest+1} element) - (addr of lowest element)
8258     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8259     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8260     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8261     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8262     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8263     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8264                                                   /*isSigned=*/false);
8265     CombinedInfo.Sizes.push_back(Size);
8266     // Map type is always TARGET_PARAM, if generate info for captures.
8267     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8268                                                  : OMP_MAP_TARGET_PARAM);
8269     // If any element has the present modifier, then make sure the runtime
8270     // doesn't attempt to allocate the struct.
8271     if (CurTypes.end() !=
8272         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8273           return Type & OMP_MAP_PRESENT;
8274         }))
8275       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8276     // Remove TARGET_PARAM flag from the first element if any.
8277     if (!CurTypes.empty())
8278       CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
8279 
8280     // All other current entries will be MEMBER_OF the combined entry
8281     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8282     // 0xFFFF in the MEMBER_OF field).
8283     OpenMPOffloadMappingFlags MemberOfFlag =
8284         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8285     for (auto &M : CurTypes)
8286       setCorrectMemberOfFlag(M, MemberOfFlag);
8287   }
8288 
8289   /// Generate all the base pointers, section pointers, sizes, map types, and
8290   /// mappers for the extracted mappable expressions (all included in \a
8291   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8292   /// pair of the relevant declaration and index where it occurs is appended to
8293   /// the device pointers info array.
8294   void generateAllInfo(
8295       MapCombinedInfoTy &CombinedInfo,
8296       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8297           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8298     // We have to process the component lists that relate with the same
8299     // declaration in a single chunk so that we can generate the map flags
8300     // correctly. Therefore, we organize all lists in a map.
8301     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8302 
8303     // Helper function to fill the information map for the different supported
8304     // clauses.
8305     auto &&InfoGen =
8306         [&Info, &SkipVarSet](
8307             const ValueDecl *D,
8308             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8309             OpenMPMapClauseKind MapType,
8310             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8311             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8312             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8313             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8314           const ValueDecl *VD =
8315               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8316           if (SkipVarSet.count(VD))
8317             return;
8318           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8319                                 ReturnDevicePointer, IsImplicit, Mapper, VarRef,
8320                                 ForDeviceAddr);
8321         };
8322 
8323     assert(CurDir.is<const OMPExecutableDirective *>() &&
8324            "Expect a executable directive");
8325     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8326     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8327       const auto *EI = C->getVarRefs().begin();
8328       for (const auto L : C->component_lists()) {
8329         // The Expression is not correct if the mapping is implicit
8330         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8331         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8332                 C->getMapTypeModifiers(), llvm::None,
8333                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8334                 E);
8335         ++EI;
8336       }
8337     }
8338     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) {
8339       const auto *EI = C->getVarRefs().begin();
8340       for (const auto L : C->component_lists()) {
8341         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8342                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8343                 C->isImplicit(), std::get<2>(L), *EI);
8344         ++EI;
8345       }
8346     }
8347     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) {
8348       const auto *EI = C->getVarRefs().begin();
8349       for (const auto L : C->component_lists()) {
8350         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8351                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8352                 C->isImplicit(), std::get<2>(L), *EI);
8353         ++EI;
8354       }
8355     }
8356 
8357     // Look at the use_device_ptr clause information and mark the existing map
8358     // entries as such. If there is no map information for an entry in the
8359     // use_device_ptr list, we create one with map type 'alloc' and zero size
8360     // section. It is the user fault if that was not mapped before. If there is
8361     // no map information and the pointer is a struct member, then we defer the
8362     // emission of that entry until the whole struct has been processed.
8363     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8364         DeferredInfo;
8365     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8366 
8367     for (const auto *C :
8368          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8369       for (const auto L : C->component_lists()) {
8370         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8371             std::get<1>(L);
8372         assert(!Components.empty() &&
8373                "Not expecting empty list of components!");
8374         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8375         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8376         const Expr *IE = Components.back().getAssociatedExpression();
8377         // If the first component is a member expression, we have to look into
8378         // 'this', which maps to null in the map of map information. Otherwise
8379         // look directly for the information.
8380         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8381 
8382         // We potentially have map information for this declaration already.
8383         // Look for the first set of components that refer to it.
8384         if (It != Info.end()) {
8385           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8386             return MI.Components.back().getAssociatedDeclaration() == VD;
8387           });
8388           // If we found a map entry, signal that the pointer has to be returned
8389           // and move on to the next declaration.
8390           // Exclude cases where the base pointer is mapped as array subscript,
8391           // array section or array shaping. The base address is passed as a
8392           // pointer to base in this case and cannot be used as a base for
8393           // use_device_ptr list item.
8394           if (CI != It->second.end()) {
8395             auto PrevCI = std::next(CI->Components.rbegin());
8396             const auto *VarD = dyn_cast<VarDecl>(VD);
8397             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8398                 isa<MemberExpr>(IE) ||
8399                 !VD->getType().getNonReferenceType()->isPointerType() ||
8400                 PrevCI == CI->Components.rend() ||
8401                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8402                 VarD->hasLocalStorage()) {
8403               CI->ReturnDevicePointer = true;
8404               continue;
8405             }
8406           }
8407         }
8408 
8409         // We didn't find any match in our map information - generate a zero
8410         // size array section - if the pointer is a struct member we defer this
8411         // action until the whole struct has been processed.
8412         if (isa<MemberExpr>(IE)) {
8413           // Insert the pointer into Info to be processed by
8414           // generateInfoForComponentList. Because it is a member pointer
8415           // without a pointee, no entry will be generated for it, therefore
8416           // we need to generate one after the whole struct has been processed.
8417           // Nonetheless, generateInfoForComponentList must be called to take
8418           // the pointer into account for the calculation of the range of the
8419           // partial struct.
8420           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8421                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8422           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8423         } else {
8424           llvm::Value *Ptr =
8425               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8426           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8427           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8428           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8429           UseDevicePtrCombinedInfo.Sizes.push_back(
8430               llvm::Constant::getNullValue(CGF.Int64Ty));
8431           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8432           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8433         }
8434       }
8435     }
8436 
8437     // Look at the use_device_addr clause information and mark the existing map
8438     // entries as such. If there is no map information for an entry in the
8439     // use_device_addr list, we create one with map type 'alloc' and zero size
8440     // section. It is the user fault if that was not mapped before. If there is
8441     // no map information and the pointer is a struct member, then we defer the
8442     // emission of that entry until the whole struct has been processed.
8443     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8444     for (const auto *C :
8445          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8446       for (const auto L : C->component_lists()) {
8447         assert(!std::get<1>(L).empty() &&
8448                "Not expecting empty list of components!");
8449         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8450         if (!Processed.insert(VD).second)
8451           continue;
8452         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8453         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8454         // If the first component is a member expression, we have to look into
8455         // 'this', which maps to null in the map of map information. Otherwise
8456         // look directly for the information.
8457         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8458 
8459         // We potentially have map information for this declaration already.
8460         // Look for the first set of components that refer to it.
8461         if (It != Info.end()) {
8462           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8463             return MI.Components.back().getAssociatedDeclaration() == VD;
8464           });
8465           // If we found a map entry, signal that the pointer has to be returned
8466           // and move on to the next declaration.
8467           if (CI != It->second.end()) {
8468             CI->ReturnDevicePointer = true;
8469             continue;
8470           }
8471         }
8472 
8473         // We didn't find any match in our map information - generate a zero
8474         // size array section - if the pointer is a struct member we defer this
8475         // action until the whole struct has been processed.
8476         if (isa<MemberExpr>(IE)) {
8477           // Insert the pointer into Info to be processed by
8478           // generateInfoForComponentList. Because it is a member pointer
8479           // without a pointee, no entry will be generated for it, therefore
8480           // we need to generate one after the whole struct has been processed.
8481           // Nonetheless, generateInfoForComponentList must be called to take
8482           // the pointer into account for the calculation of the range of the
8483           // partial struct.
8484           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8485                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8486                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8487           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8488         } else {
8489           llvm::Value *Ptr;
8490           if (IE->isGLValue())
8491             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8492           else
8493             Ptr = CGF.EmitScalarExpr(IE);
8494           CombinedInfo.Exprs.push_back(VD);
8495           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8496           CombinedInfo.Pointers.push_back(Ptr);
8497           CombinedInfo.Sizes.push_back(
8498               llvm::Constant::getNullValue(CGF.Int64Ty));
8499           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8500           CombinedInfo.Mappers.push_back(nullptr);
8501         }
8502       }
8503     }
8504 
8505     for (const auto &M : Info) {
8506       // Underlying variable declaration used in the map clause.
8507       const ValueDecl *VD = std::get<0>(M);
8508 
8509       // Temporary generated information.
8510       MapCombinedInfoTy CurInfo;
8511       StructRangeInfoTy PartialStruct;
8512 
8513       for (const MapInfo &L : M.second) {
8514         assert(!L.Components.empty() &&
8515                "Not expecting declaration with no component lists.");
8516 
8517         // Remember the current base pointer index.
8518         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8519         CurInfo.NonContigInfo.IsNonContiguous =
8520             L.Components.back().isNonContiguous();
8521         generateInfoForComponentList(
8522             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8523             PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit,
8524             L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8525 
8526         // If this entry relates with a device pointer, set the relevant
8527         // declaration and add the 'return pointer' flag.
8528         if (L.ReturnDevicePointer) {
8529           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8530                  "Unexpected number of mapped base pointers.");
8531 
8532           const ValueDecl *RelevantVD =
8533               L.Components.back().getAssociatedDeclaration();
8534           assert(RelevantVD &&
8535                  "No relevant declaration related with device pointer??");
8536 
8537           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8538               RelevantVD);
8539           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8540         }
8541       }
8542 
8543       // Append any pending zero-length pointers which are struct members and
8544       // used with use_device_ptr or use_device_addr.
8545       auto CI = DeferredInfo.find(M.first);
8546       if (CI != DeferredInfo.end()) {
8547         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8548           llvm::Value *BasePtr;
8549           llvm::Value *Ptr;
8550           if (L.ForDeviceAddr) {
8551             if (L.IE->isGLValue())
8552               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8553             else
8554               Ptr = this->CGF.EmitScalarExpr(L.IE);
8555             BasePtr = Ptr;
8556             // Entry is RETURN_PARAM. Also, set the placeholder value
8557             // MEMBER_OF=FFFF so that the entry is later updated with the
8558             // correct value of MEMBER_OF.
8559             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8560           } else {
8561             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8562             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8563                                              L.IE->getExprLoc());
8564             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8565             // value MEMBER_OF=FFFF so that the entry is later updated with the
8566             // correct value of MEMBER_OF.
8567             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8568                                     OMP_MAP_MEMBER_OF);
8569           }
8570           CurInfo.Exprs.push_back(L.VD);
8571           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8572           CurInfo.Pointers.push_back(Ptr);
8573           CurInfo.Sizes.push_back(
8574               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8575           CurInfo.Mappers.push_back(nullptr);
8576         }
8577       }
8578 
8579       // If there is an entry in PartialStruct it means we have a struct with
8580       // individual members mapped. Emit an extra combined entry.
8581       if (PartialStruct.Base.isValid())
8582         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8583 
8584       // We need to append the results of this capture to what we already have.
8585       CombinedInfo.append(CurInfo);
8586     }
8587     // Append data for use_device_ptr clauses.
8588     CombinedInfo.append(UseDevicePtrCombinedInfo);
8589   }
8590 
8591   /// Generate all the base pointers, section pointers, sizes, map types, and
8592   /// mappers for the extracted map clauses of user-defined mapper (all included
8593   /// in \a CombinedInfo).
8594   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8595     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8596            "Expect a declare mapper directive");
8597     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8598     // We have to process the component lists that relate with the same
8599     // declaration in a single chunk so that we can generate the map flags
8600     // correctly. Therefore, we organize all lists in a map.
8601     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8602 
8603     // Fill the information map for map clauses.
8604     for (const auto *C : CurMapperDir->clauselists()) {
8605       const auto *MC = cast<OMPMapClause>(C);
8606       const auto *EI = MC->getVarRefs().begin();
8607       for (const auto L : MC->component_lists()) {
8608         // The Expression is not correct if the mapping is implicit
8609         const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr;
8610         const ValueDecl *VD =
8611             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8612                            : nullptr;
8613         // Get the corresponding user-defined mapper.
8614         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8615                               MC->getMapTypeModifiers(), llvm::None,
8616                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8617                               std::get<2>(L), E);
8618         ++EI;
8619       }
8620     }
8621 
8622     for (const auto &M : Info) {
8623       // We need to know when we generate information for the first component
8624       // associated with a capture, because the mapping flags depend on it.
8625       bool IsFirstComponentList = true;
8626 
8627       // Underlying variable declaration used in the map clause.
8628       const ValueDecl *VD = std::get<0>(M);
8629 
8630       // Temporary generated information.
8631       MapCombinedInfoTy CurInfo;
8632       StructRangeInfoTy PartialStruct;
8633 
8634       for (const MapInfo &L : M.second) {
8635         assert(!L.Components.empty() &&
8636                "Not expecting declaration with no component lists.");
8637         generateInfoForComponentList(
8638             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8639             PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
8640             L.ForDeviceAddr, VD, L.VarRef);
8641         IsFirstComponentList = false;
8642       }
8643 
8644       // If there is an entry in PartialStruct it means we have a struct with
8645       // individual members mapped. Emit an extra combined entry.
8646       if (PartialStruct.Base.isValid()) {
8647         CurInfo.NonContigInfo.Dims.push_back(0);
8648         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8649       }
8650 
8651       // We need to append the results of this capture to what we already have.
8652       CombinedInfo.append(CurInfo);
8653     }
8654   }
8655 
8656   /// Emit capture info for lambdas for variables captured by reference.
8657   void generateInfoForLambdaCaptures(
8658       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8659       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8660     const auto *RD = VD->getType()
8661                          .getCanonicalType()
8662                          .getNonReferenceType()
8663                          ->getAsCXXRecordDecl();
8664     if (!RD || !RD->isLambda())
8665       return;
8666     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8667     LValue VDLVal = CGF.MakeAddrLValue(
8668         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8669     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8670     FieldDecl *ThisCapture = nullptr;
8671     RD->getCaptureFields(Captures, ThisCapture);
8672     if (ThisCapture) {
8673       LValue ThisLVal =
8674           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8675       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8676       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8677                                  VDLVal.getPointer(CGF));
8678       CombinedInfo.Exprs.push_back(VD);
8679       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8680       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8681       CombinedInfo.Sizes.push_back(
8682           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8683                                     CGF.Int64Ty, /*isSigned=*/true));
8684       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8685                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8686       CombinedInfo.Mappers.push_back(nullptr);
8687     }
8688     for (const LambdaCapture &LC : RD->captures()) {
8689       if (!LC.capturesVariable())
8690         continue;
8691       const VarDecl *VD = LC.getCapturedVar();
8692       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8693         continue;
8694       auto It = Captures.find(VD);
8695       assert(It != Captures.end() && "Found lambda capture without field.");
8696       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8697       if (LC.getCaptureKind() == LCK_ByRef) {
8698         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8699         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8700                                    VDLVal.getPointer(CGF));
8701         CombinedInfo.Exprs.push_back(VD);
8702         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8703         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8704         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8705             CGF.getTypeSize(
8706                 VD->getType().getCanonicalType().getNonReferenceType()),
8707             CGF.Int64Ty, /*isSigned=*/true));
8708       } else {
8709         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8710         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8711                                    VDLVal.getPointer(CGF));
8712         CombinedInfo.Exprs.push_back(VD);
8713         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8714         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8715         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8716       }
8717       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8718                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8719       CombinedInfo.Mappers.push_back(nullptr);
8720     }
8721   }
8722 
8723   /// Set correct indices for lambdas captures.
8724   void adjustMemberOfForLambdaCaptures(
8725       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8726       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8727       MapFlagsArrayTy &Types) const {
8728     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8729       // Set correct member_of idx for all implicit lambda captures.
8730       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8731                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8732         continue;
8733       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8734       assert(BasePtr && "Unable to find base lambda address.");
8735       int TgtIdx = -1;
8736       for (unsigned J = I; J > 0; --J) {
8737         unsigned Idx = J - 1;
8738         if (Pointers[Idx] != BasePtr)
8739           continue;
8740         TgtIdx = Idx;
8741         break;
8742       }
8743       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8744       // All other current entries will be MEMBER_OF the combined entry
8745       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8746       // 0xFFFF in the MEMBER_OF field).
8747       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8748       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8749     }
8750   }
8751 
8752   /// Generate the base pointers, section pointers, sizes, map types, and
8753   /// mappers associated to a given capture (all included in \a CombinedInfo).
8754   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8755                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8756                               StructRangeInfoTy &PartialStruct) const {
8757     assert(!Cap->capturesVariableArrayType() &&
8758            "Not expecting to generate map info for a variable array type!");
8759 
8760     // We need to know when we generating information for the first component
8761     const ValueDecl *VD = Cap->capturesThis()
8762                               ? nullptr
8763                               : Cap->getCapturedVar()->getCanonicalDecl();
8764 
8765     // If this declaration appears in a is_device_ptr clause we just have to
8766     // pass the pointer by value. If it is a reference to a declaration, we just
8767     // pass its value.
8768     if (DevPointersMap.count(VD)) {
8769       CombinedInfo.Exprs.push_back(VD);
8770       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8771       CombinedInfo.Pointers.push_back(Arg);
8772       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8773           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8774           /*isSigned=*/true));
8775       CombinedInfo.Types.push_back(
8776           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8777           OMP_MAP_TARGET_PARAM);
8778       CombinedInfo.Mappers.push_back(nullptr);
8779       return;
8780     }
8781 
8782     using MapData =
8783         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8784                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8785                    const ValueDecl *, const Expr *>;
8786     SmallVector<MapData, 4> DeclComponentLists;
8787     assert(CurDir.is<const OMPExecutableDirective *>() &&
8788            "Expect a executable directive");
8789     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8790     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8791       const auto *EI = C->getVarRefs().begin();
8792       for (const auto L : C->decl_component_lists(VD)) {
8793         const ValueDecl *VDecl, *Mapper;
8794         // The Expression is not correct if the mapping is implicit
8795         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8796         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8797         std::tie(VDecl, Components, Mapper) = L;
8798         assert(VDecl == VD && "We got information for the wrong declaration??");
8799         assert(!Components.empty() &&
8800                "Not expecting declaration with no component lists.");
8801         DeclComponentLists.emplace_back(Components, C->getMapType(),
8802                                         C->getMapTypeModifiers(),
8803                                         C->isImplicit(), Mapper, E);
8804         ++EI;
8805       }
8806     }
8807 
8808     // Find overlapping elements (including the offset from the base element).
8809     llvm::SmallDenseMap<
8810         const MapData *,
8811         llvm::SmallVector<
8812             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8813         4>
8814         OverlappedData;
8815     size_t Count = 0;
8816     for (const MapData &L : DeclComponentLists) {
8817       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8818       OpenMPMapClauseKind MapType;
8819       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8820       bool IsImplicit;
8821       const ValueDecl *Mapper;
8822       const Expr *VarRef;
8823       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8824           L;
8825       ++Count;
8826       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8827         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8828         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8829                  VarRef) = L1;
8830         auto CI = Components.rbegin();
8831         auto CE = Components.rend();
8832         auto SI = Components1.rbegin();
8833         auto SE = Components1.rend();
8834         for (; CI != CE && SI != SE; ++CI, ++SI) {
8835           if (CI->getAssociatedExpression()->getStmtClass() !=
8836               SI->getAssociatedExpression()->getStmtClass())
8837             break;
8838           // Are we dealing with different variables/fields?
8839           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8840             break;
8841         }
8842         // Found overlapping if, at least for one component, reached the head of
8843         // the components list.
8844         if (CI == CE || SI == SE) {
8845           assert((CI != CE || SI != SE) &&
8846                  "Unexpected full match of the mapping components.");
8847           const MapData &BaseData = CI == CE ? L : L1;
8848           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8849               SI == SE ? Components : Components1;
8850           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8851           OverlappedElements.getSecond().push_back(SubData);
8852         }
8853       }
8854     }
8855     // Sort the overlapped elements for each item.
8856     llvm::SmallVector<const FieldDecl *, 4> Layout;
8857     if (!OverlappedData.empty()) {
8858       if (const auto *CRD =
8859               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8860         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8861       else {
8862         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8863         Layout.append(RD->field_begin(), RD->field_end());
8864       }
8865     }
8866     for (auto &Pair : OverlappedData) {
8867       llvm::sort(
8868           Pair.getSecond(),
8869           [&Layout](
8870               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8871               OMPClauseMappableExprCommon::MappableExprComponentListRef
8872                   Second) {
8873             auto CI = First.rbegin();
8874             auto CE = First.rend();
8875             auto SI = Second.rbegin();
8876             auto SE = Second.rend();
8877             for (; CI != CE && SI != SE; ++CI, ++SI) {
8878               if (CI->getAssociatedExpression()->getStmtClass() !=
8879                   SI->getAssociatedExpression()->getStmtClass())
8880                 break;
8881               // Are we dealing with different variables/fields?
8882               if (CI->getAssociatedDeclaration() !=
8883                   SI->getAssociatedDeclaration())
8884                 break;
8885             }
8886 
8887             // Lists contain the same elements.
8888             if (CI == CE && SI == SE)
8889               return false;
8890 
8891             // List with less elements is less than list with more elements.
8892             if (CI == CE || SI == SE)
8893               return CI == CE;
8894 
8895             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8896             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8897             if (FD1->getParent() == FD2->getParent())
8898               return FD1->getFieldIndex() < FD2->getFieldIndex();
8899             const auto It =
8900                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8901                   return FD == FD1 || FD == FD2;
8902                 });
8903             return *It == FD1;
8904           });
8905     }
8906 
8907     // Associated with a capture, because the mapping flags depend on it.
8908     // Go through all of the elements with the overlapped elements.
8909     for (const auto &Pair : OverlappedData) {
8910       const MapData &L = *Pair.getFirst();
8911       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8912       OpenMPMapClauseKind MapType;
8913       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8914       bool IsImplicit;
8915       const ValueDecl *Mapper;
8916       const Expr *VarRef;
8917       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8918           L;
8919       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8920           OverlappedComponents = Pair.getSecond();
8921       bool IsFirstComponentList = true;
8922       generateInfoForComponentList(
8923           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8924           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8925           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8926     }
8927     // Go through other elements without overlapped elements.
8928     bool IsFirstComponentList = OverlappedData.empty();
8929     for (const MapData &L : DeclComponentLists) {
8930       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8931       OpenMPMapClauseKind MapType;
8932       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8933       bool IsImplicit;
8934       const ValueDecl *Mapper;
8935       const Expr *VarRef;
8936       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8937           L;
8938       auto It = OverlappedData.find(&L);
8939       if (It == OverlappedData.end())
8940         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8941                                      Components, CombinedInfo, PartialStruct,
8942                                      IsFirstComponentList, IsImplicit, Mapper,
8943                                      /*ForDeviceAddr=*/false, VD, VarRef);
8944       IsFirstComponentList = false;
8945     }
8946   }
8947 
8948   /// Generate the default map information for a given capture \a CI,
8949   /// record field declaration \a RI and captured value \a CV.
8950   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8951                               const FieldDecl &RI, llvm::Value *CV,
8952                               MapCombinedInfoTy &CombinedInfo) const {
8953     bool IsImplicit = true;
8954     // Do the default mapping.
8955     if (CI.capturesThis()) {
8956       CombinedInfo.Exprs.push_back(nullptr);
8957       CombinedInfo.BasePointers.push_back(CV);
8958       CombinedInfo.Pointers.push_back(CV);
8959       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8960       CombinedInfo.Sizes.push_back(
8961           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8962                                     CGF.Int64Ty, /*isSigned=*/true));
8963       // Default map type.
8964       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8965     } else if (CI.capturesVariableByCopy()) {
8966       const VarDecl *VD = CI.getCapturedVar();
8967       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8968       CombinedInfo.BasePointers.push_back(CV);
8969       CombinedInfo.Pointers.push_back(CV);
8970       if (!RI.getType()->isAnyPointerType()) {
8971         // We have to signal to the runtime captures passed by value that are
8972         // not pointers.
8973         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8974         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8975             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8976       } else {
8977         // Pointers are implicitly mapped with a zero size and no flags
8978         // (other than first map that is added for all implicit maps).
8979         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8980         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8981       }
8982       auto I = FirstPrivateDecls.find(VD);
8983       if (I != FirstPrivateDecls.end())
8984         IsImplicit = I->getSecond();
8985     } else {
8986       assert(CI.capturesVariable() && "Expected captured reference.");
8987       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8988       QualType ElementType = PtrTy->getPointeeType();
8989       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8990           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8991       // The default map type for a scalar/complex type is 'to' because by
8992       // default the value doesn't have to be retrieved. For an aggregate
8993       // type, the default is 'tofrom'.
8994       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8995       const VarDecl *VD = CI.getCapturedVar();
8996       auto I = FirstPrivateDecls.find(VD);
8997       if (I != FirstPrivateDecls.end() &&
8998           VD->getType().isConstant(CGF.getContext())) {
8999         llvm::Constant *Addr =
9000             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9001         // Copy the value of the original variable to the new global copy.
9002         CGF.Builder.CreateMemCpy(
9003             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9004             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9005             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9006         // Use new global variable as the base pointers.
9007         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9008         CombinedInfo.BasePointers.push_back(Addr);
9009         CombinedInfo.Pointers.push_back(Addr);
9010       } else {
9011         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9012         CombinedInfo.BasePointers.push_back(CV);
9013         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9014           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9015               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9016               AlignmentSource::Decl));
9017           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9018         } else {
9019           CombinedInfo.Pointers.push_back(CV);
9020         }
9021       }
9022       if (I != FirstPrivateDecls.end())
9023         IsImplicit = I->getSecond();
9024     }
9025     // Every default map produces a single argument which is a target parameter.
9026     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9027 
9028     // Add flag stating this is an implicit map.
9029     if (IsImplicit)
9030       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9031 
9032     // No user-defined mapper for default mapping.
9033     CombinedInfo.Mappers.push_back(nullptr);
9034   }
9035 };
9036 } // anonymous namespace
9037 
9038 static void emitNonContiguousDescriptor(
9039     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9040     CGOpenMPRuntime::TargetDataInfo &Info) {
9041   CodeGenModule &CGM = CGF.CGM;
9042   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9043       &NonContigInfo = CombinedInfo.NonContigInfo;
9044 
9045   // Build an array of struct descriptor_dim and then assign it to
9046   // offload_args.
9047   //
9048   // struct descriptor_dim {
9049   //  uint64_t offset;
9050   //  uint64_t count;
9051   //  uint64_t stride
9052   // };
9053   ASTContext &C = CGF.getContext();
9054   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9055   RecordDecl *RD;
9056   RD = C.buildImplicitRecord("descriptor_dim");
9057   RD->startDefinition();
9058   addFieldToRecordDecl(C, RD, Int64Ty);
9059   addFieldToRecordDecl(C, RD, Int64Ty);
9060   addFieldToRecordDecl(C, RD, Int64Ty);
9061   RD->completeDefinition();
9062   QualType DimTy = C.getRecordType(RD);
9063 
9064   enum { OffsetFD = 0, CountFD, StrideFD };
9065   // We need two index variable here since the size of "Dims" is the same as the
9066   // size of Components, however, the size of offset, count, and stride is equal
9067   // to the size of base declaration that is non-contiguous.
9068   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9069     // Skip emitting ir if dimension size is 1 since it cannot be
9070     // non-contiguous.
9071     if (NonContigInfo.Dims[I] == 1)
9072       continue;
9073     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9074     QualType ArrayTy =
9075         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9076     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9077     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9078       unsigned RevIdx = EE - II - 1;
9079       LValue DimsLVal = CGF.MakeAddrLValue(
9080           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9081       // Offset
9082       LValue OffsetLVal = CGF.EmitLValueForField(
9083           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9084       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9085       // Count
9086       LValue CountLVal = CGF.EmitLValueForField(
9087           DimsLVal, *std::next(RD->field_begin(), CountFD));
9088       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9089       // Stride
9090       LValue StrideLVal = CGF.EmitLValueForField(
9091           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9092       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9093     }
9094     // args[I] = &dims
9095     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9096         DimsAddr, CGM.Int8PtrTy);
9097     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9098         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9099         Info.PointersArray, 0, I);
9100     Address PAddr(P, CGF.getPointerAlign());
9101     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9102     ++L;
9103   }
9104 }
9105 
9106 /// Emit a string constant containing the names of the values mapped to the
9107 /// offloading runtime library.
9108 llvm::Constant *
9109 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9110                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9111   llvm::Constant *SrcLocStr;
9112   if (!MapExprs.getMapDecl()) {
9113     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9114   } else {
9115     std::string ExprName = "";
9116     if (MapExprs.getMapExpr()) {
9117       PrintingPolicy P(CGF.getContext().getLangOpts());
9118       llvm::raw_string_ostream OS(ExprName);
9119       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9120       OS.flush();
9121     } else {
9122       ExprName = MapExprs.getMapDecl()->getNameAsString();
9123     }
9124 
9125     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9126     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9127     const char *FileName = PLoc.getFilename();
9128     unsigned Line = PLoc.getLine();
9129     unsigned Column = PLoc.getColumn();
9130     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9131                                                 Line, Column);
9132   }
9133 
9134   return SrcLocStr;
9135 }
9136 
9137 /// Emit the arrays used to pass the captures and map information to the
9138 /// offloading runtime library. If there is no map or capture information,
9139 /// return nullptr by reference.
9140 static void emitOffloadingArrays(
9141     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9142     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9143     bool IsNonContiguous = false) {
9144   CodeGenModule &CGM = CGF.CGM;
9145   ASTContext &Ctx = CGF.getContext();
9146 
9147   // Reset the array information.
9148   Info.clearArrayInfo();
9149   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9150 
9151   if (Info.NumberOfPtrs) {
9152     // Detect if we have any capture size requiring runtime evaluation of the
9153     // size so that a constant array could be eventually used.
9154     bool hasRuntimeEvaluationCaptureSize = false;
9155     for (llvm::Value *S : CombinedInfo.Sizes)
9156       if (!isa<llvm::Constant>(S)) {
9157         hasRuntimeEvaluationCaptureSize = true;
9158         break;
9159       }
9160 
9161     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9162     QualType PointerArrayType = Ctx.getConstantArrayType(
9163         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9164         /*IndexTypeQuals=*/0);
9165 
9166     Info.BasePointersArray =
9167         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9168     Info.PointersArray =
9169         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9170     Address MappersArray =
9171         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9172     Info.MappersArray = MappersArray.getPointer();
9173 
9174     // If we don't have any VLA types or other types that require runtime
9175     // evaluation, we can use a constant array for the map sizes, otherwise we
9176     // need to fill up the arrays as we do for the pointers.
9177     QualType Int64Ty =
9178         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9179     if (hasRuntimeEvaluationCaptureSize) {
9180       QualType SizeArrayType = Ctx.getConstantArrayType(
9181           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9182           /*IndexTypeQuals=*/0);
9183       Info.SizesArray =
9184           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9185     } else {
9186       // We expect all the sizes to be constant, so we collect them to create
9187       // a constant array.
9188       SmallVector<llvm::Constant *, 16> ConstSizes;
9189       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9190         if (IsNonContiguous &&
9191             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9192           ConstSizes.push_back(llvm::ConstantInt::get(
9193               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9194         } else {
9195           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9196         }
9197       }
9198 
9199       auto *SizesArrayInit = llvm::ConstantArray::get(
9200           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9201       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9202       auto *SizesArrayGbl = new llvm::GlobalVariable(
9203           CGM.getModule(), SizesArrayInit->getType(),
9204           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9205           SizesArrayInit, Name);
9206       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9207       Info.SizesArray = SizesArrayGbl;
9208     }
9209 
9210     // The map types are always constant so we don't need to generate code to
9211     // fill arrays. Instead, we create an array constant.
9212     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9213     llvm::copy(CombinedInfo.Types, Mapping.begin());
9214     llvm::Constant *MapTypesArrayInit =
9215         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9216     std::string MaptypesName =
9217         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9218     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9219         CGM.getModule(), MapTypesArrayInit->getType(),
9220         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9221         MapTypesArrayInit, MaptypesName);
9222     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9223     Info.MapTypesArray = MapTypesArrayGbl;
9224 
9225     // The information types are only built if there is debug information
9226     // requested.
9227     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9228       Info.MapNamesArray = llvm::Constant::getNullValue(
9229           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9230     } else {
9231       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9232         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9233       };
9234       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9235       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9236 
9237       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9238           llvm::ArrayType::get(
9239               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9240               CombinedInfo.Exprs.size()),
9241           InfoMap);
9242       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9243           CGM.getModule(), MapNamesArrayInit->getType(),
9244           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9245           MapNamesArrayInit,
9246           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9247       Info.MapNamesArray = MapNamesArrayGbl;
9248     }
9249 
9250     // If there's a present map type modifier, it must not be applied to the end
9251     // of a region, so generate a separate map type array in that case.
9252     if (Info.separateBeginEndCalls()) {
9253       bool EndMapTypesDiffer = false;
9254       for (uint64_t &Type : Mapping) {
9255         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9256           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9257           EndMapTypesDiffer = true;
9258         }
9259       }
9260       if (EndMapTypesDiffer) {
9261         MapTypesArrayInit =
9262             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9263         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9264         MapTypesArrayGbl = new llvm::GlobalVariable(
9265             CGM.getModule(), MapTypesArrayInit->getType(),
9266             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9267             MapTypesArrayInit, MaptypesName);
9268         MapTypesArrayGbl->setUnnamedAddr(
9269             llvm::GlobalValue::UnnamedAddr::Global);
9270         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9271       }
9272     }
9273 
9274     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9275       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9276       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9277           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9278           Info.BasePointersArray, 0, I);
9279       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9280           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9281       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9282       CGF.Builder.CreateStore(BPVal, BPAddr);
9283 
9284       if (Info.requiresDevicePointerInfo())
9285         if (const ValueDecl *DevVD =
9286                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9287           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9288 
9289       llvm::Value *PVal = CombinedInfo.Pointers[I];
9290       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9291           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9292           Info.PointersArray, 0, I);
9293       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9294           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9295       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9296       CGF.Builder.CreateStore(PVal, PAddr);
9297 
9298       if (hasRuntimeEvaluationCaptureSize) {
9299         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9300             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9301             Info.SizesArray,
9302             /*Idx0=*/0,
9303             /*Idx1=*/I);
9304         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9305         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9306                                                           CGM.Int64Ty,
9307                                                           /*isSigned=*/true),
9308                                 SAddr);
9309       }
9310 
9311       // Fill up the mapper array.
9312       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9313       if (CombinedInfo.Mappers[I]) {
9314         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9315             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9316         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9317         Info.HasMapper = true;
9318       }
9319       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9320       CGF.Builder.CreateStore(MFunc, MAddr);
9321     }
9322   }
9323 
9324   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9325       Info.NumberOfPtrs == 0)
9326     return;
9327 
9328   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9329 }
9330 
9331 namespace {
9332 /// Additional arguments for emitOffloadingArraysArgument function.
9333 struct ArgumentsOptions {
9334   bool ForEndCall = false;
9335   ArgumentsOptions() = default;
9336   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9337 };
9338 } // namespace
9339 
9340 /// Emit the arguments to be passed to the runtime library based on the
9341 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9342 /// ForEndCall, emit map types to be passed for the end of the region instead of
9343 /// the beginning.
9344 static void emitOffloadingArraysArgument(
9345     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9346     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9347     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9348     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9349     const ArgumentsOptions &Options = ArgumentsOptions()) {
9350   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9351          "expected region end call to runtime only when end call is separate");
9352   CodeGenModule &CGM = CGF.CGM;
9353   if (Info.NumberOfPtrs) {
9354     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9355         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9356         Info.BasePointersArray,
9357         /*Idx0=*/0, /*Idx1=*/0);
9358     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9359         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9360         Info.PointersArray,
9361         /*Idx0=*/0,
9362         /*Idx1=*/0);
9363     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9364         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9365         /*Idx0=*/0, /*Idx1=*/0);
9366     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9367         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9368         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9369                                                     : Info.MapTypesArray,
9370         /*Idx0=*/0,
9371         /*Idx1=*/0);
9372 
9373     // Only emit the mapper information arrays if debug information is
9374     // requested.
9375     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9376       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9377     else
9378       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9379           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9380           Info.MapNamesArray,
9381           /*Idx0=*/0,
9382           /*Idx1=*/0);
9383     // If there is no user-defined mapper, set the mapper array to nullptr to
9384     // avoid an unnecessary data privatization
9385     if (!Info.HasMapper)
9386       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9387     else
9388       MappersArrayArg =
9389           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9390   } else {
9391     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9392     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9393     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9394     MapTypesArrayArg =
9395         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9396     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9397     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9398   }
9399 }
9400 
9401 /// Check for inner distribute directive.
9402 static const OMPExecutableDirective *
9403 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9404   const auto *CS = D.getInnermostCapturedStmt();
9405   const auto *Body =
9406       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9407   const Stmt *ChildStmt =
9408       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9409 
9410   if (const auto *NestedDir =
9411           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9412     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9413     switch (D.getDirectiveKind()) {
9414     case OMPD_target:
9415       if (isOpenMPDistributeDirective(DKind))
9416         return NestedDir;
9417       if (DKind == OMPD_teams) {
9418         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9419             /*IgnoreCaptured=*/true);
9420         if (!Body)
9421           return nullptr;
9422         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9423         if (const auto *NND =
9424                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9425           DKind = NND->getDirectiveKind();
9426           if (isOpenMPDistributeDirective(DKind))
9427             return NND;
9428         }
9429       }
9430       return nullptr;
9431     case OMPD_target_teams:
9432       if (isOpenMPDistributeDirective(DKind))
9433         return NestedDir;
9434       return nullptr;
9435     case OMPD_target_parallel:
9436     case OMPD_target_simd:
9437     case OMPD_target_parallel_for:
9438     case OMPD_target_parallel_for_simd:
9439       return nullptr;
9440     case OMPD_target_teams_distribute:
9441     case OMPD_target_teams_distribute_simd:
9442     case OMPD_target_teams_distribute_parallel_for:
9443     case OMPD_target_teams_distribute_parallel_for_simd:
9444     case OMPD_parallel:
9445     case OMPD_for:
9446     case OMPD_parallel_for:
9447     case OMPD_parallel_master:
9448     case OMPD_parallel_sections:
9449     case OMPD_for_simd:
9450     case OMPD_parallel_for_simd:
9451     case OMPD_cancel:
9452     case OMPD_cancellation_point:
9453     case OMPD_ordered:
9454     case OMPD_threadprivate:
9455     case OMPD_allocate:
9456     case OMPD_task:
9457     case OMPD_simd:
9458     case OMPD_sections:
9459     case OMPD_section:
9460     case OMPD_single:
9461     case OMPD_master:
9462     case OMPD_critical:
9463     case OMPD_taskyield:
9464     case OMPD_barrier:
9465     case OMPD_taskwait:
9466     case OMPD_taskgroup:
9467     case OMPD_atomic:
9468     case OMPD_flush:
9469     case OMPD_depobj:
9470     case OMPD_scan:
9471     case OMPD_teams:
9472     case OMPD_target_data:
9473     case OMPD_target_exit_data:
9474     case OMPD_target_enter_data:
9475     case OMPD_distribute:
9476     case OMPD_distribute_simd:
9477     case OMPD_distribute_parallel_for:
9478     case OMPD_distribute_parallel_for_simd:
9479     case OMPD_teams_distribute:
9480     case OMPD_teams_distribute_simd:
9481     case OMPD_teams_distribute_parallel_for:
9482     case OMPD_teams_distribute_parallel_for_simd:
9483     case OMPD_target_update:
9484     case OMPD_declare_simd:
9485     case OMPD_declare_variant:
9486     case OMPD_begin_declare_variant:
9487     case OMPD_end_declare_variant:
9488     case OMPD_declare_target:
9489     case OMPD_end_declare_target:
9490     case OMPD_declare_reduction:
9491     case OMPD_declare_mapper:
9492     case OMPD_taskloop:
9493     case OMPD_taskloop_simd:
9494     case OMPD_master_taskloop:
9495     case OMPD_master_taskloop_simd:
9496     case OMPD_parallel_master_taskloop:
9497     case OMPD_parallel_master_taskloop_simd:
9498     case OMPD_requires:
9499     case OMPD_unknown:
9500     default:
9501       llvm_unreachable("Unexpected directive.");
9502     }
9503   }
9504 
9505   return nullptr;
9506 }
9507 
9508 /// Emit the user-defined mapper function. The code generation follows the
9509 /// pattern in the example below.
9510 /// \code
9511 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9512 ///                                           void *base, void *begin,
9513 ///                                           int64_t size, int64_t type,
9514 ///                                           void *name = nullptr) {
9515 ///   // Allocate space for an array section first.
9516 ///   if (size > 1 && !maptype.IsDelete)
9517 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9518 ///                                 size*sizeof(Ty), clearToFrom(type));
9519 ///   // Map members.
9520 ///   for (unsigned i = 0; i < size; i++) {
9521 ///     // For each component specified by this mapper:
9522 ///     for (auto c : all_components) {
9523 ///       if (c.hasMapper())
9524 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9525 ///                       c.arg_type, c.arg_name);
9526 ///       else
9527 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9528 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9529 ///                                     c.arg_name);
9530 ///     }
9531 ///   }
9532 ///   // Delete the array section.
9533 ///   if (size > 1 && maptype.IsDelete)
9534 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9535 ///                                 size*sizeof(Ty), clearToFrom(type));
9536 /// }
9537 /// \endcode
9538 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9539                                             CodeGenFunction *CGF) {
9540   if (UDMMap.count(D) > 0)
9541     return;
9542   ASTContext &C = CGM.getContext();
9543   QualType Ty = D->getType();
9544   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9545   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9546   auto *MapperVarDecl =
9547       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9548   SourceLocation Loc = D->getLocation();
9549   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9550 
9551   // Prepare mapper function arguments and attributes.
9552   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9553                               C.VoidPtrTy, ImplicitParamDecl::Other);
9554   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9555                             ImplicitParamDecl::Other);
9556   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9557                              C.VoidPtrTy, ImplicitParamDecl::Other);
9558   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9559                             ImplicitParamDecl::Other);
9560   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9561                             ImplicitParamDecl::Other);
9562   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9563                             ImplicitParamDecl::Other);
9564   FunctionArgList Args;
9565   Args.push_back(&HandleArg);
9566   Args.push_back(&BaseArg);
9567   Args.push_back(&BeginArg);
9568   Args.push_back(&SizeArg);
9569   Args.push_back(&TypeArg);
9570   Args.push_back(&NameArg);
9571   const CGFunctionInfo &FnInfo =
9572       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9573   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9574   SmallString<64> TyStr;
9575   llvm::raw_svector_ostream Out(TyStr);
9576   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9577   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9578   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9579                                     Name, &CGM.getModule());
9580   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9581   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9582   // Start the mapper function code generation.
9583   CodeGenFunction MapperCGF(CGM);
9584   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9585   // Compute the starting and end addreses of array elements.
9586   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9587       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9588       C.getPointerType(Int64Ty), Loc);
9589   // Convert the size in bytes into the number of array elements.
9590   Size = MapperCGF.Builder.CreateExactUDiv(
9591       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9592   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9593       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9594       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9595   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9596   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9597       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9598       C.getPointerType(Int64Ty), Loc);
9599   // Prepare common arguments for array initiation and deletion.
9600   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9601       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9602       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9603   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9604       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9605       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9606   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9607       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9608       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9609 
9610   // Emit array initiation if this is an array section and \p MapType indicates
9611   // that memory allocation is required.
9612   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9613   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9614                              ElementSize, HeadBB, /*IsInit=*/true);
9615 
9616   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9617 
9618   // Emit the loop header block.
9619   MapperCGF.EmitBlock(HeadBB);
9620   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9621   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9622   // Evaluate whether the initial condition is satisfied.
9623   llvm::Value *IsEmpty =
9624       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9625   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9626   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9627 
9628   // Emit the loop body block.
9629   MapperCGF.EmitBlock(BodyBB);
9630   llvm::BasicBlock *LastBB = BodyBB;
9631   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9632       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9633   PtrPHI->addIncoming(PtrBegin, EntryBB);
9634   Address PtrCurrent =
9635       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9636                           .getAlignment()
9637                           .alignmentOfArrayElement(ElementSize));
9638   // Privatize the declared variable of mapper to be the current array element.
9639   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9640   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9641     return MapperCGF
9642         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9643         .getAddress(MapperCGF);
9644   });
9645   (void)Scope.Privatize();
9646 
9647   // Get map clause information. Fill up the arrays with all mapped variables.
9648   MappableExprsHandler::MapCombinedInfoTy Info;
9649   MappableExprsHandler MEHandler(*D, MapperCGF);
9650   MEHandler.generateAllInfoForMapper(Info);
9651 
9652   // Call the runtime API __tgt_mapper_num_components to get the number of
9653   // pre-existing components.
9654   llvm::Value *OffloadingArgs[] = {Handle};
9655   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9656       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9657                                             OMPRTL___tgt_mapper_num_components),
9658       OffloadingArgs);
9659   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9660       PreviousSize,
9661       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9662 
9663   // Fill up the runtime mapper handle for all components.
9664   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9665     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9666         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9667     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9668         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9669     llvm::Value *CurSizeArg = Info.Sizes[I];
9670     llvm::Value *CurNameArg =
9671         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9672             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9673             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9674 
9675     // Extract the MEMBER_OF field from the map type.
9676     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9677     MapperCGF.EmitBlock(MemberBB);
9678     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9679     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9680         OriMapType,
9681         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9682     llvm::BasicBlock *MemberCombineBB =
9683         MapperCGF.createBasicBlock("omp.member.combine");
9684     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9685     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9686     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9687     // Add the number of pre-existing components to the MEMBER_OF field if it
9688     // is valid.
9689     MapperCGF.EmitBlock(MemberCombineBB);
9690     llvm::Value *CombinedMember =
9691         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9692     // Do nothing if it is not a member of previous components.
9693     MapperCGF.EmitBlock(TypeBB);
9694     llvm::PHINode *MemberMapType =
9695         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9696     MemberMapType->addIncoming(OriMapType, MemberBB);
9697     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9698 
9699     // Combine the map type inherited from user-defined mapper with that
9700     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9701     // bits of the \a MapType, which is the input argument of the mapper
9702     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9703     // bits of MemberMapType.
9704     // [OpenMP 5.0], 1.2.6. map-type decay.
9705     //        | alloc |  to   | from  | tofrom | release | delete
9706     // ----------------------------------------------------------
9707     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9708     // to     | alloc |  to   | alloc |   to   | release | delete
9709     // from   | alloc | alloc | from  |  from  | release | delete
9710     // tofrom | alloc |  to   | from  | tofrom | release | delete
9711     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9712         MapType,
9713         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9714                                    MappableExprsHandler::OMP_MAP_FROM));
9715     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9716     llvm::BasicBlock *AllocElseBB =
9717         MapperCGF.createBasicBlock("omp.type.alloc.else");
9718     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9719     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9720     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9721     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9722     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9723     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9724     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9725     MapperCGF.EmitBlock(AllocBB);
9726     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9727         MemberMapType,
9728         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9729                                      MappableExprsHandler::OMP_MAP_FROM)));
9730     MapperCGF.Builder.CreateBr(EndBB);
9731     MapperCGF.EmitBlock(AllocElseBB);
9732     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9733         LeftToFrom,
9734         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9735     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9736     // In case of to, clear OMP_MAP_FROM.
9737     MapperCGF.EmitBlock(ToBB);
9738     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9739         MemberMapType,
9740         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9741     MapperCGF.Builder.CreateBr(EndBB);
9742     MapperCGF.EmitBlock(ToElseBB);
9743     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9744         LeftToFrom,
9745         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9746     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9747     // In case of from, clear OMP_MAP_TO.
9748     MapperCGF.EmitBlock(FromBB);
9749     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9750         MemberMapType,
9751         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9752     // In case of tofrom, do nothing.
9753     MapperCGF.EmitBlock(EndBB);
9754     LastBB = EndBB;
9755     llvm::PHINode *CurMapType =
9756         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9757     CurMapType->addIncoming(AllocMapType, AllocBB);
9758     CurMapType->addIncoming(ToMapType, ToBB);
9759     CurMapType->addIncoming(FromMapType, FromBB);
9760     CurMapType->addIncoming(MemberMapType, ToElseBB);
9761 
9762     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9763                                      CurSizeArg, CurMapType, CurNameArg};
9764     if (Info.Mappers[I]) {
9765       // Call the corresponding mapper function.
9766       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9767           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9768       assert(MapperFunc && "Expect a valid mapper function is available.");
9769       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9770     } else {
9771       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9772       // data structure.
9773       MapperCGF.EmitRuntimeCall(
9774           OMPBuilder.getOrCreateRuntimeFunction(
9775               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9776           OffloadingArgs);
9777     }
9778   }
9779 
9780   // Update the pointer to point to the next element that needs to be mapped,
9781   // and check whether we have mapped all elements.
9782   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9783       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9784   PtrPHI->addIncoming(PtrNext, LastBB);
9785   llvm::Value *IsDone =
9786       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9787   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9788   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9789 
9790   MapperCGF.EmitBlock(ExitBB);
9791   // Emit array deletion if this is an array section and \p MapType indicates
9792   // that deletion is required.
9793   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9794                              ElementSize, DoneBB, /*IsInit=*/false);
9795 
9796   // Emit the function exit block.
9797   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9798   MapperCGF.FinishFunction();
9799   UDMMap.try_emplace(D, Fn);
9800   if (CGF) {
9801     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9802     Decls.second.push_back(D);
9803   }
9804 }
9805 
9806 /// Emit the array initialization or deletion portion for user-defined mapper
9807 /// code generation. First, it evaluates whether an array section is mapped and
9808 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9809 /// true, and \a MapType indicates to not delete this array, array
9810 /// initialization code is generated. If \a IsInit is false, and \a MapType
9811 /// indicates to not this array, array deletion code is generated.
9812 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9813     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9814     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9815     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9816   StringRef Prefix = IsInit ? ".init" : ".del";
9817 
9818   // Evaluate if this is an array section.
9819   llvm::BasicBlock *IsDeleteBB =
9820       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9821   llvm::BasicBlock *BodyBB =
9822       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9823   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9824       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9825   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9826 
9827   // Evaluate if we are going to delete this section.
9828   MapperCGF.EmitBlock(IsDeleteBB);
9829   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9830       MapType,
9831       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9832   llvm::Value *DeleteCond;
9833   if (IsInit) {
9834     DeleteCond = MapperCGF.Builder.CreateIsNull(
9835         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9836   } else {
9837     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9838         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9839   }
9840   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9841 
9842   MapperCGF.EmitBlock(BodyBB);
9843   // Get the array size by multiplying element size and element number (i.e., \p
9844   // Size).
9845   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9846       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9847   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9848   // memory allocation/deletion purpose only.
9849   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9850       MapType,
9851       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9852                                    MappableExprsHandler::OMP_MAP_FROM)));
9853   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9854 
9855   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9856   // data structure.
9857   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9858                                    ArraySize, MapTypeArg, MapNameArg};
9859   MapperCGF.EmitRuntimeCall(
9860       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9861                                             OMPRTL___tgt_push_mapper_component),
9862       OffloadingArgs);
9863 }
9864 
9865 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9866     const OMPDeclareMapperDecl *D) {
9867   auto I = UDMMap.find(D);
9868   if (I != UDMMap.end())
9869     return I->second;
9870   emitUserDefinedMapper(D);
9871   return UDMMap.lookup(D);
9872 }
9873 
9874 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9875     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9876     llvm::Value *DeviceID,
9877     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9878                                      const OMPLoopDirective &D)>
9879         SizeEmitter) {
9880   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9881   const OMPExecutableDirective *TD = &D;
9882   // Get nested teams distribute kind directive, if any.
9883   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9884     TD = getNestedDistributeDirective(CGM.getContext(), D);
9885   if (!TD)
9886     return;
9887   const auto *LD = cast<OMPLoopDirective>(TD);
9888   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9889                                                          PrePostActionTy &) {
9890     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9891       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9892       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9893       CGF.EmitRuntimeCall(
9894           OMPBuilder.getOrCreateRuntimeFunction(
9895               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9896           Args);
9897     }
9898   };
9899   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9900 }
9901 
9902 void CGOpenMPRuntime::emitTargetCall(
9903     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9904     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9905     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9906     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9907                                      const OMPLoopDirective &D)>
9908         SizeEmitter) {
9909   if (!CGF.HaveInsertPoint())
9910     return;
9911 
9912   assert(OutlinedFn && "Invalid outlined function!");
9913 
9914   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9915                                  D.hasClausesOfKind<OMPNowaitClause>();
9916   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9917   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9918   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9919                                             PrePostActionTy &) {
9920     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9921   };
9922   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9923 
9924   CodeGenFunction::OMPTargetDataInfo InputInfo;
9925   llvm::Value *MapTypesArray = nullptr;
9926   llvm::Value *MapNamesArray = nullptr;
9927   // Fill up the pointer arrays and transfer execution to the device.
9928   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9929                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9930                     &CapturedVars,
9931                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9932     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9933       // Reverse offloading is not supported, so just execute on the host.
9934       if (RequiresOuterTask) {
9935         CapturedVars.clear();
9936         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9937       }
9938       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9939       return;
9940     }
9941 
9942     // On top of the arrays that were filled up, the target offloading call
9943     // takes as arguments the device id as well as the host pointer. The host
9944     // pointer is used by the runtime library to identify the current target
9945     // region, so it only has to be unique and not necessarily point to
9946     // anything. It could be the pointer to the outlined function that
9947     // implements the target region, but we aren't using that so that the
9948     // compiler doesn't need to keep that, and could therefore inline the host
9949     // function if proven worthwhile during optimization.
9950 
9951     // From this point on, we need to have an ID of the target region defined.
9952     assert(OutlinedFnID && "Invalid outlined function ID!");
9953 
9954     // Emit device ID if any.
9955     llvm::Value *DeviceID;
9956     if (Device.getPointer()) {
9957       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9958               Device.getInt() == OMPC_DEVICE_device_num) &&
9959              "Expected device_num modifier.");
9960       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9961       DeviceID =
9962           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9963     } else {
9964       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9965     }
9966 
9967     // Emit the number of elements in the offloading arrays.
9968     llvm::Value *PointerNum =
9969         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9970 
9971     // Return value of the runtime offloading call.
9972     llvm::Value *Return;
9973 
9974     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9975     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9976 
9977     // Source location for the ident struct
9978     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9979 
9980     // Emit tripcount for the target loop-based directive.
9981     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9982 
9983     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9984     // The target region is an outlined function launched by the runtime
9985     // via calls __tgt_target() or __tgt_target_teams().
9986     //
9987     // __tgt_target() launches a target region with one team and one thread,
9988     // executing a serial region.  This master thread may in turn launch
9989     // more threads within its team upon encountering a parallel region,
9990     // however, no additional teams can be launched on the device.
9991     //
9992     // __tgt_target_teams() launches a target region with one or more teams,
9993     // each with one or more threads.  This call is required for target
9994     // constructs such as:
9995     //  'target teams'
9996     //  'target' / 'teams'
9997     //  'target teams distribute parallel for'
9998     //  'target parallel'
9999     // and so on.
10000     //
10001     // Note that on the host and CPU targets, the runtime implementation of
10002     // these calls simply call the outlined function without forking threads.
10003     // The outlined functions themselves have runtime calls to
10004     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10005     // the compiler in emitTeamsCall() and emitParallelCall().
10006     //
10007     // In contrast, on the NVPTX target, the implementation of
10008     // __tgt_target_teams() launches a GPU kernel with the requested number
10009     // of teams and threads so no additional calls to the runtime are required.
10010     if (NumTeams) {
10011       // If we have NumTeams defined this means that we have an enclosed teams
10012       // region. Therefore we also expect to have NumThreads defined. These two
10013       // values should be defined in the presence of a teams directive,
10014       // regardless of having any clauses associated. If the user is using teams
10015       // but no clauses, these two values will be the default that should be
10016       // passed to the runtime library - a 32-bit integer with the value zero.
10017       assert(NumThreads && "Thread limit expression should be available along "
10018                            "with number of teams.");
10019       llvm::Value *OffloadingArgs[] = {RTLoc,
10020                                        DeviceID,
10021                                        OutlinedFnID,
10022                                        PointerNum,
10023                                        InputInfo.BasePointersArray.getPointer(),
10024                                        InputInfo.PointersArray.getPointer(),
10025                                        InputInfo.SizesArray.getPointer(),
10026                                        MapTypesArray,
10027                                        MapNamesArray,
10028                                        InputInfo.MappersArray.getPointer(),
10029                                        NumTeams,
10030                                        NumThreads};
10031       Return = CGF.EmitRuntimeCall(
10032           OMPBuilder.getOrCreateRuntimeFunction(
10033               CGM.getModule(), HasNowait
10034                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10035                                    : OMPRTL___tgt_target_teams_mapper),
10036           OffloadingArgs);
10037     } else {
10038       llvm::Value *OffloadingArgs[] = {RTLoc,
10039                                        DeviceID,
10040                                        OutlinedFnID,
10041                                        PointerNum,
10042                                        InputInfo.BasePointersArray.getPointer(),
10043                                        InputInfo.PointersArray.getPointer(),
10044                                        InputInfo.SizesArray.getPointer(),
10045                                        MapTypesArray,
10046                                        MapNamesArray,
10047                                        InputInfo.MappersArray.getPointer()};
10048       Return = CGF.EmitRuntimeCall(
10049           OMPBuilder.getOrCreateRuntimeFunction(
10050               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10051                                          : OMPRTL___tgt_target_mapper),
10052           OffloadingArgs);
10053     }
10054 
10055     // Check the error code and execute the host version if required.
10056     llvm::BasicBlock *OffloadFailedBlock =
10057         CGF.createBasicBlock("omp_offload.failed");
10058     llvm::BasicBlock *OffloadContBlock =
10059         CGF.createBasicBlock("omp_offload.cont");
10060     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10061     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10062 
10063     CGF.EmitBlock(OffloadFailedBlock);
10064     if (RequiresOuterTask) {
10065       CapturedVars.clear();
10066       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10067     }
10068     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10069     CGF.EmitBranch(OffloadContBlock);
10070 
10071     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10072   };
10073 
10074   // Notify that the host version must be executed.
10075   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10076                     RequiresOuterTask](CodeGenFunction &CGF,
10077                                        PrePostActionTy &) {
10078     if (RequiresOuterTask) {
10079       CapturedVars.clear();
10080       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10081     }
10082     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10083   };
10084 
10085   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10086                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10087                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10088     // Fill up the arrays with all the captured variables.
10089     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10090 
10091     // Get mappable expression information.
10092     MappableExprsHandler MEHandler(D, CGF);
10093     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10094     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10095 
10096     auto RI = CS.getCapturedRecordDecl()->field_begin();
10097     auto CV = CapturedVars.begin();
10098     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10099                                               CE = CS.capture_end();
10100          CI != CE; ++CI, ++RI, ++CV) {
10101       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10102       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10103 
10104       // VLA sizes are passed to the outlined region by copy and do not have map
10105       // information associated.
10106       if (CI->capturesVariableArrayType()) {
10107         CurInfo.Exprs.push_back(nullptr);
10108         CurInfo.BasePointers.push_back(*CV);
10109         CurInfo.Pointers.push_back(*CV);
10110         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10111             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10112         // Copy to the device as an argument. No need to retrieve it.
10113         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10114                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10115                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10116         CurInfo.Mappers.push_back(nullptr);
10117       } else {
10118         // If we have any information in the map clause, we use it, otherwise we
10119         // just do a default mapping.
10120         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10121         if (!CI->capturesThis())
10122           MappedVarSet.insert(CI->getCapturedVar());
10123         else
10124           MappedVarSet.insert(nullptr);
10125         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10126           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10127         // Generate correct mapping for variables captured by reference in
10128         // lambdas.
10129         if (CI->capturesVariable())
10130           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10131                                                   CurInfo, LambdaPointers);
10132       }
10133       // We expect to have at least an element of information for this capture.
10134       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10135              "Non-existing map pointer for capture!");
10136       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10137              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10138              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10139              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10140              "Inconsistent map information sizes!");
10141 
10142       // If there is an entry in PartialStruct it means we have a struct with
10143       // individual members mapped. Emit an extra combined entry.
10144       if (PartialStruct.Base.isValid())
10145         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
10146                                     nullptr, /*NoTargetParam=*/false);
10147 
10148       // We need to append the results of this capture to what we already have.
10149       CombinedInfo.append(CurInfo);
10150     }
10151     // Adjust MEMBER_OF flags for the lambdas captures.
10152     MEHandler.adjustMemberOfForLambdaCaptures(
10153         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10154         CombinedInfo.Types);
10155     // Map any list items in a map clause that were not captures because they
10156     // weren't referenced within the construct.
10157     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10158 
10159     TargetDataInfo Info;
10160     // Fill up the arrays and create the arguments.
10161     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10162     emitOffloadingArraysArgument(
10163         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10164         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10165         {/*ForEndTask=*/false});
10166 
10167     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10168     InputInfo.BasePointersArray =
10169         Address(Info.BasePointersArray, CGM.getPointerAlign());
10170     InputInfo.PointersArray =
10171         Address(Info.PointersArray, CGM.getPointerAlign());
10172     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10173     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10174     MapTypesArray = Info.MapTypesArray;
10175     MapNamesArray = Info.MapNamesArray;
10176     if (RequiresOuterTask)
10177       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10178     else
10179       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10180   };
10181 
10182   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10183                              CodeGenFunction &CGF, PrePostActionTy &) {
10184     if (RequiresOuterTask) {
10185       CodeGenFunction::OMPTargetDataInfo InputInfo;
10186       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10187     } else {
10188       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10189     }
10190   };
10191 
10192   // If we have a target function ID it means that we need to support
10193   // offloading, otherwise, just execute on the host. We need to execute on host
10194   // regardless of the conditional in the if clause if, e.g., the user do not
10195   // specify target triples.
10196   if (OutlinedFnID) {
10197     if (IfCond) {
10198       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10199     } else {
10200       RegionCodeGenTy ThenRCG(TargetThenGen);
10201       ThenRCG(CGF);
10202     }
10203   } else {
10204     RegionCodeGenTy ElseRCG(TargetElseGen);
10205     ElseRCG(CGF);
10206   }
10207 }
10208 
10209 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10210                                                     StringRef ParentName) {
10211   if (!S)
10212     return;
10213 
10214   // Codegen OMP target directives that offload compute to the device.
10215   bool RequiresDeviceCodegen =
10216       isa<OMPExecutableDirective>(S) &&
10217       isOpenMPTargetExecutionDirective(
10218           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10219 
10220   if (RequiresDeviceCodegen) {
10221     const auto &E = *cast<OMPExecutableDirective>(S);
10222     unsigned DeviceID;
10223     unsigned FileID;
10224     unsigned Line;
10225     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10226                              FileID, Line);
10227 
10228     // Is this a target region that should not be emitted as an entry point? If
10229     // so just signal we are done with this target region.
10230     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10231                                                             ParentName, Line))
10232       return;
10233 
10234     switch (E.getDirectiveKind()) {
10235     case OMPD_target:
10236       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10237                                                    cast<OMPTargetDirective>(E));
10238       break;
10239     case OMPD_target_parallel:
10240       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10241           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10242       break;
10243     case OMPD_target_teams:
10244       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10245           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10246       break;
10247     case OMPD_target_teams_distribute:
10248       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10249           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10250       break;
10251     case OMPD_target_teams_distribute_simd:
10252       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10253           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10254       break;
10255     case OMPD_target_parallel_for:
10256       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10257           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10258       break;
10259     case OMPD_target_parallel_for_simd:
10260       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10261           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10262       break;
10263     case OMPD_target_simd:
10264       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10265           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10266       break;
10267     case OMPD_target_teams_distribute_parallel_for:
10268       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10269           CGM, ParentName,
10270           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10271       break;
10272     case OMPD_target_teams_distribute_parallel_for_simd:
10273       CodeGenFunction::
10274           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10275               CGM, ParentName,
10276               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10277       break;
10278     case OMPD_parallel:
10279     case OMPD_for:
10280     case OMPD_parallel_for:
10281     case OMPD_parallel_master:
10282     case OMPD_parallel_sections:
10283     case OMPD_for_simd:
10284     case OMPD_parallel_for_simd:
10285     case OMPD_cancel:
10286     case OMPD_cancellation_point:
10287     case OMPD_ordered:
10288     case OMPD_threadprivate:
10289     case OMPD_allocate:
10290     case OMPD_task:
10291     case OMPD_simd:
10292     case OMPD_sections:
10293     case OMPD_section:
10294     case OMPD_single:
10295     case OMPD_master:
10296     case OMPD_critical:
10297     case OMPD_taskyield:
10298     case OMPD_barrier:
10299     case OMPD_taskwait:
10300     case OMPD_taskgroup:
10301     case OMPD_atomic:
10302     case OMPD_flush:
10303     case OMPD_depobj:
10304     case OMPD_scan:
10305     case OMPD_teams:
10306     case OMPD_target_data:
10307     case OMPD_target_exit_data:
10308     case OMPD_target_enter_data:
10309     case OMPD_distribute:
10310     case OMPD_distribute_simd:
10311     case OMPD_distribute_parallel_for:
10312     case OMPD_distribute_parallel_for_simd:
10313     case OMPD_teams_distribute:
10314     case OMPD_teams_distribute_simd:
10315     case OMPD_teams_distribute_parallel_for:
10316     case OMPD_teams_distribute_parallel_for_simd:
10317     case OMPD_target_update:
10318     case OMPD_declare_simd:
10319     case OMPD_declare_variant:
10320     case OMPD_begin_declare_variant:
10321     case OMPD_end_declare_variant:
10322     case OMPD_declare_target:
10323     case OMPD_end_declare_target:
10324     case OMPD_declare_reduction:
10325     case OMPD_declare_mapper:
10326     case OMPD_taskloop:
10327     case OMPD_taskloop_simd:
10328     case OMPD_master_taskloop:
10329     case OMPD_master_taskloop_simd:
10330     case OMPD_parallel_master_taskloop:
10331     case OMPD_parallel_master_taskloop_simd:
10332     case OMPD_requires:
10333     case OMPD_unknown:
10334     default:
10335       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10336     }
10337     return;
10338   }
10339 
10340   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10341     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10342       return;
10343 
10344     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10345     return;
10346   }
10347 
10348   // If this is a lambda function, look into its body.
10349   if (const auto *L = dyn_cast<LambdaExpr>(S))
10350     S = L->getBody();
10351 
10352   // Keep looking for target regions recursively.
10353   for (const Stmt *II : S->children())
10354     scanForTargetRegionsFunctions(II, ParentName);
10355 }
10356 
10357 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10358   // If emitting code for the host, we do not process FD here. Instead we do
10359   // the normal code generation.
10360   if (!CGM.getLangOpts().OpenMPIsDevice) {
10361     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10362       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10363           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10364       // Do not emit device_type(nohost) functions for the host.
10365       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10366         return true;
10367     }
10368     return false;
10369   }
10370 
10371   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10372   // Try to detect target regions in the function.
10373   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10374     StringRef Name = CGM.getMangledName(GD);
10375     scanForTargetRegionsFunctions(FD->getBody(), Name);
10376     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10377         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10378     // Do not emit device_type(nohost) functions for the host.
10379     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10380       return true;
10381   }
10382 
10383   // Do not to emit function if it is not marked as declare target.
10384   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10385          AlreadyEmittedTargetDecls.count(VD) == 0;
10386 }
10387 
10388 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10389   if (!CGM.getLangOpts().OpenMPIsDevice)
10390     return false;
10391 
10392   // Check if there are Ctors/Dtors in this declaration and look for target
10393   // regions in it. We use the complete variant to produce the kernel name
10394   // mangling.
10395   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10396   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10397     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10398       StringRef ParentName =
10399           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10400       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10401     }
10402     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10403       StringRef ParentName =
10404           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10405       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10406     }
10407   }
10408 
10409   // Do not to emit variable if it is not marked as declare target.
10410   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10411       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10412           cast<VarDecl>(GD.getDecl()));
10413   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10414       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10415        HasRequiresUnifiedSharedMemory)) {
10416     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10417     return true;
10418   }
10419   return false;
10420 }
10421 
10422 llvm::Constant *
10423 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10424                                                 const VarDecl *VD) {
10425   assert(VD->getType().isConstant(CGM.getContext()) &&
10426          "Expected constant variable.");
10427   StringRef VarName;
10428   llvm::Constant *Addr;
10429   llvm::GlobalValue::LinkageTypes Linkage;
10430   QualType Ty = VD->getType();
10431   SmallString<128> Buffer;
10432   {
10433     unsigned DeviceID;
10434     unsigned FileID;
10435     unsigned Line;
10436     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10437                              FileID, Line);
10438     llvm::raw_svector_ostream OS(Buffer);
10439     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10440        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10441     VarName = OS.str();
10442   }
10443   Linkage = llvm::GlobalValue::InternalLinkage;
10444   Addr =
10445       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10446                                   getDefaultFirstprivateAddressSpace());
10447   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10448   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10449   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10450   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10451       VarName, Addr, VarSize,
10452       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10453   return Addr;
10454 }
10455 
10456 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10457                                                    llvm::Constant *Addr) {
10458   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10459       !CGM.getLangOpts().OpenMPIsDevice)
10460     return;
10461   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10462       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10463   if (!Res) {
10464     if (CGM.getLangOpts().OpenMPIsDevice) {
10465       // Register non-target variables being emitted in device code (debug info
10466       // may cause this).
10467       StringRef VarName = CGM.getMangledName(VD);
10468       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10469     }
10470     return;
10471   }
10472   // Register declare target variables.
10473   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10474   StringRef VarName;
10475   CharUnits VarSize;
10476   llvm::GlobalValue::LinkageTypes Linkage;
10477 
10478   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10479       !HasRequiresUnifiedSharedMemory) {
10480     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10481     VarName = CGM.getMangledName(VD);
10482     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10483       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10484       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10485     } else {
10486       VarSize = CharUnits::Zero();
10487     }
10488     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10489     // Temp solution to prevent optimizations of the internal variables.
10490     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10491       std::string RefName = getName({VarName, "ref"});
10492       if (!CGM.GetGlobalValue(RefName)) {
10493         llvm::Constant *AddrRef =
10494             getOrCreateInternalVariable(Addr->getType(), RefName);
10495         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10496         GVAddrRef->setConstant(/*Val=*/true);
10497         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10498         GVAddrRef->setInitializer(Addr);
10499         CGM.addCompilerUsedGlobal(GVAddrRef);
10500       }
10501     }
10502   } else {
10503     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10504             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10505              HasRequiresUnifiedSharedMemory)) &&
10506            "Declare target attribute must link or to with unified memory.");
10507     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10508       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10509     else
10510       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10511 
10512     if (CGM.getLangOpts().OpenMPIsDevice) {
10513       VarName = Addr->getName();
10514       Addr = nullptr;
10515     } else {
10516       VarName = getAddrOfDeclareTargetVar(VD).getName();
10517       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10518     }
10519     VarSize = CGM.getPointerSize();
10520     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10521   }
10522 
10523   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10524       VarName, Addr, VarSize, Flags, Linkage);
10525 }
10526 
10527 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10528   if (isa<FunctionDecl>(GD.getDecl()) ||
10529       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10530     return emitTargetFunctions(GD);
10531 
10532   return emitTargetGlobalVariable(GD);
10533 }
10534 
10535 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10536   for (const VarDecl *VD : DeferredGlobalVariables) {
10537     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10538         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10539     if (!Res)
10540       continue;
10541     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10542         !HasRequiresUnifiedSharedMemory) {
10543       CGM.EmitGlobal(VD);
10544     } else {
10545       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10546               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10547                HasRequiresUnifiedSharedMemory)) &&
10548              "Expected link clause or to clause with unified memory.");
10549       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10550     }
10551   }
10552 }
10553 
10554 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10555     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10556   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10557          " Expected target-based directive.");
10558 }
10559 
10560 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10561   for (const OMPClause *Clause : D->clauselists()) {
10562     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10563       HasRequiresUnifiedSharedMemory = true;
10564     } else if (const auto *AC =
10565                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10566       switch (AC->getAtomicDefaultMemOrderKind()) {
10567       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10568         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10569         break;
10570       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10571         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10572         break;
10573       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10574         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10575         break;
10576       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10577         break;
10578       }
10579     }
10580   }
10581 }
10582 
10583 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10584   return RequiresAtomicOrdering;
10585 }
10586 
10587 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10588                                                        LangAS &AS) {
10589   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10590     return false;
10591   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10592   switch(A->getAllocatorType()) {
10593   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10594   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10595   // Not supported, fallback to the default mem space.
10596   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10597   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10598   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10599   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10600   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10601   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10602   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10603     AS = LangAS::Default;
10604     return true;
10605   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10606     llvm_unreachable("Expected predefined allocator for the variables with the "
10607                      "static storage.");
10608   }
10609   return false;
10610 }
10611 
10612 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10613   return HasRequiresUnifiedSharedMemory;
10614 }
10615 
10616 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10617     CodeGenModule &CGM)
10618     : CGM(CGM) {
10619   if (CGM.getLangOpts().OpenMPIsDevice) {
10620     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10621     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10622   }
10623 }
10624 
10625 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10626   if (CGM.getLangOpts().OpenMPIsDevice)
10627     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10628 }
10629 
10630 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10631   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10632     return true;
10633 
10634   const auto *D = cast<FunctionDecl>(GD.getDecl());
10635   // Do not to emit function if it is marked as declare target as it was already
10636   // emitted.
10637   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10638     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10639       if (auto *F = dyn_cast_or_null<llvm::Function>(
10640               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10641         return !F->isDeclaration();
10642       return false;
10643     }
10644     return true;
10645   }
10646 
10647   return !AlreadyEmittedTargetDecls.insert(D).second;
10648 }
10649 
10650 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10651   // If we don't have entries or if we are emitting code for the device, we
10652   // don't need to do anything.
10653   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10654       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10655       (OffloadEntriesInfoManager.empty() &&
10656        !HasEmittedDeclareTargetRegion &&
10657        !HasEmittedTargetRegion))
10658     return nullptr;
10659 
10660   // Create and register the function that handles the requires directives.
10661   ASTContext &C = CGM.getContext();
10662 
10663   llvm::Function *RequiresRegFn;
10664   {
10665     CodeGenFunction CGF(CGM);
10666     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10667     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10668     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10669     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10670     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10671     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10672     // TODO: check for other requires clauses.
10673     // The requires directive takes effect only when a target region is
10674     // present in the compilation unit. Otherwise it is ignored and not
10675     // passed to the runtime. This avoids the runtime from throwing an error
10676     // for mismatching requires clauses across compilation units that don't
10677     // contain at least 1 target region.
10678     assert((HasEmittedTargetRegion ||
10679             HasEmittedDeclareTargetRegion ||
10680             !OffloadEntriesInfoManager.empty()) &&
10681            "Target or declare target region expected.");
10682     if (HasRequiresUnifiedSharedMemory)
10683       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10684     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10685                             CGM.getModule(), OMPRTL___tgt_register_requires),
10686                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10687     CGF.FinishFunction();
10688   }
10689   return RequiresRegFn;
10690 }
10691 
10692 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10693                                     const OMPExecutableDirective &D,
10694                                     SourceLocation Loc,
10695                                     llvm::Function *OutlinedFn,
10696                                     ArrayRef<llvm::Value *> CapturedVars) {
10697   if (!CGF.HaveInsertPoint())
10698     return;
10699 
10700   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10701   CodeGenFunction::RunCleanupsScope Scope(CGF);
10702 
10703   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10704   llvm::Value *Args[] = {
10705       RTLoc,
10706       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10707       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10708   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10709   RealArgs.append(std::begin(Args), std::end(Args));
10710   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10711 
10712   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10713       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10714   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10715 }
10716 
10717 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10718                                          const Expr *NumTeams,
10719                                          const Expr *ThreadLimit,
10720                                          SourceLocation Loc) {
10721   if (!CGF.HaveInsertPoint())
10722     return;
10723 
10724   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10725 
10726   llvm::Value *NumTeamsVal =
10727       NumTeams
10728           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10729                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10730           : CGF.Builder.getInt32(0);
10731 
10732   llvm::Value *ThreadLimitVal =
10733       ThreadLimit
10734           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10735                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10736           : CGF.Builder.getInt32(0);
10737 
10738   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10739   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10740                                      ThreadLimitVal};
10741   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10742                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10743                       PushNumTeamsArgs);
10744 }
10745 
10746 void CGOpenMPRuntime::emitTargetDataCalls(
10747     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10748     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10749   if (!CGF.HaveInsertPoint())
10750     return;
10751 
10752   // Action used to replace the default codegen action and turn privatization
10753   // off.
10754   PrePostActionTy NoPrivAction;
10755 
10756   // Generate the code for the opening of the data environment. Capture all the
10757   // arguments of the runtime call by reference because they are used in the
10758   // closing of the region.
10759   auto &&BeginThenGen = [this, &D, Device, &Info,
10760                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10761     // Fill up the arrays with all the mapped variables.
10762     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10763 
10764     // Get map clause information.
10765     MappableExprsHandler MEHandler(D, CGF);
10766     MEHandler.generateAllInfo(CombinedInfo);
10767 
10768     // Fill up the arrays and create the arguments.
10769     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10770                          /*IsNonContiguous=*/true);
10771 
10772     llvm::Value *BasePointersArrayArg = nullptr;
10773     llvm::Value *PointersArrayArg = nullptr;
10774     llvm::Value *SizesArrayArg = nullptr;
10775     llvm::Value *MapTypesArrayArg = nullptr;
10776     llvm::Value *MapNamesArrayArg = nullptr;
10777     llvm::Value *MappersArrayArg = nullptr;
10778     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10779                                  SizesArrayArg, MapTypesArrayArg,
10780                                  MapNamesArrayArg, MappersArrayArg, Info);
10781 
10782     // Emit device ID if any.
10783     llvm::Value *DeviceID = nullptr;
10784     if (Device) {
10785       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10786                                            CGF.Int64Ty, /*isSigned=*/true);
10787     } else {
10788       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10789     }
10790 
10791     // Emit the number of elements in the offloading arrays.
10792     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10793     //
10794     // Source location for the ident struct
10795     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10796 
10797     llvm::Value *OffloadingArgs[] = {RTLoc,
10798                                      DeviceID,
10799                                      PointerNum,
10800                                      BasePointersArrayArg,
10801                                      PointersArrayArg,
10802                                      SizesArrayArg,
10803                                      MapTypesArrayArg,
10804                                      MapNamesArrayArg,
10805                                      MappersArrayArg};
10806     CGF.EmitRuntimeCall(
10807         OMPBuilder.getOrCreateRuntimeFunction(
10808             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10809         OffloadingArgs);
10810 
10811     // If device pointer privatization is required, emit the body of the region
10812     // here. It will have to be duplicated: with and without privatization.
10813     if (!Info.CaptureDeviceAddrMap.empty())
10814       CodeGen(CGF);
10815   };
10816 
10817   // Generate code for the closing of the data region.
10818   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10819                                                 PrePostActionTy &) {
10820     assert(Info.isValid() && "Invalid data environment closing arguments.");
10821 
10822     llvm::Value *BasePointersArrayArg = nullptr;
10823     llvm::Value *PointersArrayArg = nullptr;
10824     llvm::Value *SizesArrayArg = nullptr;
10825     llvm::Value *MapTypesArrayArg = nullptr;
10826     llvm::Value *MapNamesArrayArg = nullptr;
10827     llvm::Value *MappersArrayArg = nullptr;
10828     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10829                                  SizesArrayArg, MapTypesArrayArg,
10830                                  MapNamesArrayArg, MappersArrayArg, Info,
10831                                  {/*ForEndCall=*/true});
10832 
10833     // Emit device ID if any.
10834     llvm::Value *DeviceID = nullptr;
10835     if (Device) {
10836       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10837                                            CGF.Int64Ty, /*isSigned=*/true);
10838     } else {
10839       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10840     }
10841 
10842     // Emit the number of elements in the offloading arrays.
10843     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10844 
10845     // Source location for the ident struct
10846     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10847 
10848     llvm::Value *OffloadingArgs[] = {RTLoc,
10849                                      DeviceID,
10850                                      PointerNum,
10851                                      BasePointersArrayArg,
10852                                      PointersArrayArg,
10853                                      SizesArrayArg,
10854                                      MapTypesArrayArg,
10855                                      MapNamesArrayArg,
10856                                      MappersArrayArg};
10857     CGF.EmitRuntimeCall(
10858         OMPBuilder.getOrCreateRuntimeFunction(
10859             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10860         OffloadingArgs);
10861   };
10862 
10863   // If we need device pointer privatization, we need to emit the body of the
10864   // region with no privatization in the 'else' branch of the conditional.
10865   // Otherwise, we don't have to do anything.
10866   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10867                                                          PrePostActionTy &) {
10868     if (!Info.CaptureDeviceAddrMap.empty()) {
10869       CodeGen.setAction(NoPrivAction);
10870       CodeGen(CGF);
10871     }
10872   };
10873 
10874   // We don't have to do anything to close the region if the if clause evaluates
10875   // to false.
10876   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10877 
10878   if (IfCond) {
10879     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10880   } else {
10881     RegionCodeGenTy RCG(BeginThenGen);
10882     RCG(CGF);
10883   }
10884 
10885   // If we don't require privatization of device pointers, we emit the body in
10886   // between the runtime calls. This avoids duplicating the body code.
10887   if (Info.CaptureDeviceAddrMap.empty()) {
10888     CodeGen.setAction(NoPrivAction);
10889     CodeGen(CGF);
10890   }
10891 
10892   if (IfCond) {
10893     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10894   } else {
10895     RegionCodeGenTy RCG(EndThenGen);
10896     RCG(CGF);
10897   }
10898 }
10899 
10900 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10901     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10902     const Expr *Device) {
10903   if (!CGF.HaveInsertPoint())
10904     return;
10905 
10906   assert((isa<OMPTargetEnterDataDirective>(D) ||
10907           isa<OMPTargetExitDataDirective>(D) ||
10908           isa<OMPTargetUpdateDirective>(D)) &&
10909          "Expecting either target enter, exit data, or update directives.");
10910 
10911   CodeGenFunction::OMPTargetDataInfo InputInfo;
10912   llvm::Value *MapTypesArray = nullptr;
10913   llvm::Value *MapNamesArray = nullptr;
10914   // Generate the code for the opening of the data environment.
10915   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10916                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10917     // Emit device ID if any.
10918     llvm::Value *DeviceID = nullptr;
10919     if (Device) {
10920       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10921                                            CGF.Int64Ty, /*isSigned=*/true);
10922     } else {
10923       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10924     }
10925 
10926     // Emit the number of elements in the offloading arrays.
10927     llvm::Constant *PointerNum =
10928         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10929 
10930     // Source location for the ident struct
10931     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10932 
10933     llvm::Value *OffloadingArgs[] = {RTLoc,
10934                                      DeviceID,
10935                                      PointerNum,
10936                                      InputInfo.BasePointersArray.getPointer(),
10937                                      InputInfo.PointersArray.getPointer(),
10938                                      InputInfo.SizesArray.getPointer(),
10939                                      MapTypesArray,
10940                                      MapNamesArray,
10941                                      InputInfo.MappersArray.getPointer()};
10942 
10943     // Select the right runtime function call for each standalone
10944     // directive.
10945     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10946     RuntimeFunction RTLFn;
10947     switch (D.getDirectiveKind()) {
10948     case OMPD_target_enter_data:
10949       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10950                         : OMPRTL___tgt_target_data_begin_mapper;
10951       break;
10952     case OMPD_target_exit_data:
10953       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10954                         : OMPRTL___tgt_target_data_end_mapper;
10955       break;
10956     case OMPD_target_update:
10957       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10958                         : OMPRTL___tgt_target_data_update_mapper;
10959       break;
10960     case OMPD_parallel:
10961     case OMPD_for:
10962     case OMPD_parallel_for:
10963     case OMPD_parallel_master:
10964     case OMPD_parallel_sections:
10965     case OMPD_for_simd:
10966     case OMPD_parallel_for_simd:
10967     case OMPD_cancel:
10968     case OMPD_cancellation_point:
10969     case OMPD_ordered:
10970     case OMPD_threadprivate:
10971     case OMPD_allocate:
10972     case OMPD_task:
10973     case OMPD_simd:
10974     case OMPD_sections:
10975     case OMPD_section:
10976     case OMPD_single:
10977     case OMPD_master:
10978     case OMPD_critical:
10979     case OMPD_taskyield:
10980     case OMPD_barrier:
10981     case OMPD_taskwait:
10982     case OMPD_taskgroup:
10983     case OMPD_atomic:
10984     case OMPD_flush:
10985     case OMPD_depobj:
10986     case OMPD_scan:
10987     case OMPD_teams:
10988     case OMPD_target_data:
10989     case OMPD_distribute:
10990     case OMPD_distribute_simd:
10991     case OMPD_distribute_parallel_for:
10992     case OMPD_distribute_parallel_for_simd:
10993     case OMPD_teams_distribute:
10994     case OMPD_teams_distribute_simd:
10995     case OMPD_teams_distribute_parallel_for:
10996     case OMPD_teams_distribute_parallel_for_simd:
10997     case OMPD_declare_simd:
10998     case OMPD_declare_variant:
10999     case OMPD_begin_declare_variant:
11000     case OMPD_end_declare_variant:
11001     case OMPD_declare_target:
11002     case OMPD_end_declare_target:
11003     case OMPD_declare_reduction:
11004     case OMPD_declare_mapper:
11005     case OMPD_taskloop:
11006     case OMPD_taskloop_simd:
11007     case OMPD_master_taskloop:
11008     case OMPD_master_taskloop_simd:
11009     case OMPD_parallel_master_taskloop:
11010     case OMPD_parallel_master_taskloop_simd:
11011     case OMPD_target:
11012     case OMPD_target_simd:
11013     case OMPD_target_teams_distribute:
11014     case OMPD_target_teams_distribute_simd:
11015     case OMPD_target_teams_distribute_parallel_for:
11016     case OMPD_target_teams_distribute_parallel_for_simd:
11017     case OMPD_target_teams:
11018     case OMPD_target_parallel:
11019     case OMPD_target_parallel_for:
11020     case OMPD_target_parallel_for_simd:
11021     case OMPD_requires:
11022     case OMPD_unknown:
11023     default:
11024       llvm_unreachable("Unexpected standalone target data directive.");
11025       break;
11026     }
11027     CGF.EmitRuntimeCall(
11028         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11029         OffloadingArgs);
11030   };
11031 
11032   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11033                           &MapNamesArray](CodeGenFunction &CGF,
11034                                           PrePostActionTy &) {
11035     // Fill up the arrays with all the mapped variables.
11036     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11037 
11038     // Get map clause information.
11039     MappableExprsHandler MEHandler(D, CGF);
11040     MEHandler.generateAllInfo(CombinedInfo);
11041 
11042     TargetDataInfo Info;
11043     // Fill up the arrays and create the arguments.
11044     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11045                          /*IsNonContiguous=*/true);
11046     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11047                              D.hasClausesOfKind<OMPNowaitClause>();
11048     emitOffloadingArraysArgument(
11049         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11050         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11051         {/*ForEndTask=*/false});
11052     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11053     InputInfo.BasePointersArray =
11054         Address(Info.BasePointersArray, CGM.getPointerAlign());
11055     InputInfo.PointersArray =
11056         Address(Info.PointersArray, CGM.getPointerAlign());
11057     InputInfo.SizesArray =
11058         Address(Info.SizesArray, CGM.getPointerAlign());
11059     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11060     MapTypesArray = Info.MapTypesArray;
11061     MapNamesArray = Info.MapNamesArray;
11062     if (RequiresOuterTask)
11063       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11064     else
11065       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11066   };
11067 
11068   if (IfCond) {
11069     emitIfClause(CGF, IfCond, TargetThenGen,
11070                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11071   } else {
11072     RegionCodeGenTy ThenRCG(TargetThenGen);
11073     ThenRCG(CGF);
11074   }
11075 }
11076 
11077 namespace {
11078   /// Kind of parameter in a function with 'declare simd' directive.
11079   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11080   /// Attribute set of the parameter.
11081   struct ParamAttrTy {
11082     ParamKindTy Kind = Vector;
11083     llvm::APSInt StrideOrArg;
11084     llvm::APSInt Alignment;
11085   };
11086 } // namespace
11087 
11088 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11089                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11090   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11091   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11092   // of that clause. The VLEN value must be power of 2.
11093   // In other case the notion of the function`s "characteristic data type" (CDT)
11094   // is used to compute the vector length.
11095   // CDT is defined in the following order:
11096   //   a) For non-void function, the CDT is the return type.
11097   //   b) If the function has any non-uniform, non-linear parameters, then the
11098   //   CDT is the type of the first such parameter.
11099   //   c) If the CDT determined by a) or b) above is struct, union, or class
11100   //   type which is pass-by-value (except for the type that maps to the
11101   //   built-in complex data type), the characteristic data type is int.
11102   //   d) If none of the above three cases is applicable, the CDT is int.
11103   // The VLEN is then determined based on the CDT and the size of vector
11104   // register of that ISA for which current vector version is generated. The
11105   // VLEN is computed using the formula below:
11106   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11107   // where vector register size specified in section 3.2.1 Registers and the
11108   // Stack Frame of original AMD64 ABI document.
11109   QualType RetType = FD->getReturnType();
11110   if (RetType.isNull())
11111     return 0;
11112   ASTContext &C = FD->getASTContext();
11113   QualType CDT;
11114   if (!RetType.isNull() && !RetType->isVoidType()) {
11115     CDT = RetType;
11116   } else {
11117     unsigned Offset = 0;
11118     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11119       if (ParamAttrs[Offset].Kind == Vector)
11120         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11121       ++Offset;
11122     }
11123     if (CDT.isNull()) {
11124       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11125         if (ParamAttrs[I + Offset].Kind == Vector) {
11126           CDT = FD->getParamDecl(I)->getType();
11127           break;
11128         }
11129       }
11130     }
11131   }
11132   if (CDT.isNull())
11133     CDT = C.IntTy;
11134   CDT = CDT->getCanonicalTypeUnqualified();
11135   if (CDT->isRecordType() || CDT->isUnionType())
11136     CDT = C.IntTy;
11137   return C.getTypeSize(CDT);
11138 }
11139 
11140 static void
11141 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11142                            const llvm::APSInt &VLENVal,
11143                            ArrayRef<ParamAttrTy> ParamAttrs,
11144                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11145   struct ISADataTy {
11146     char ISA;
11147     unsigned VecRegSize;
11148   };
11149   ISADataTy ISAData[] = {
11150       {
11151           'b', 128
11152       }, // SSE
11153       {
11154           'c', 256
11155       }, // AVX
11156       {
11157           'd', 256
11158       }, // AVX2
11159       {
11160           'e', 512
11161       }, // AVX512
11162   };
11163   llvm::SmallVector<char, 2> Masked;
11164   switch (State) {
11165   case OMPDeclareSimdDeclAttr::BS_Undefined:
11166     Masked.push_back('N');
11167     Masked.push_back('M');
11168     break;
11169   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11170     Masked.push_back('N');
11171     break;
11172   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11173     Masked.push_back('M');
11174     break;
11175   }
11176   for (char Mask : Masked) {
11177     for (const ISADataTy &Data : ISAData) {
11178       SmallString<256> Buffer;
11179       llvm::raw_svector_ostream Out(Buffer);
11180       Out << "_ZGV" << Data.ISA << Mask;
11181       if (!VLENVal) {
11182         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11183         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11184         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11185       } else {
11186         Out << VLENVal;
11187       }
11188       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11189         switch (ParamAttr.Kind){
11190         case LinearWithVarStride:
11191           Out << 's' << ParamAttr.StrideOrArg;
11192           break;
11193         case Linear:
11194           Out << 'l';
11195           if (ParamAttr.StrideOrArg != 1)
11196             Out << ParamAttr.StrideOrArg;
11197           break;
11198         case Uniform:
11199           Out << 'u';
11200           break;
11201         case Vector:
11202           Out << 'v';
11203           break;
11204         }
11205         if (!!ParamAttr.Alignment)
11206           Out << 'a' << ParamAttr.Alignment;
11207       }
11208       Out << '_' << Fn->getName();
11209       Fn->addFnAttr(Out.str());
11210     }
11211   }
11212 }
11213 
11214 // This are the Functions that are needed to mangle the name of the
11215 // vector functions generated by the compiler, according to the rules
11216 // defined in the "Vector Function ABI specifications for AArch64",
11217 // available at
11218 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11219 
11220 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11221 ///
11222 /// TODO: Need to implement the behavior for reference marked with a
11223 /// var or no linear modifiers (1.b in the section). For this, we
11224 /// need to extend ParamKindTy to support the linear modifiers.
11225 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11226   QT = QT.getCanonicalType();
11227 
11228   if (QT->isVoidType())
11229     return false;
11230 
11231   if (Kind == ParamKindTy::Uniform)
11232     return false;
11233 
11234   if (Kind == ParamKindTy::Linear)
11235     return false;
11236 
11237   // TODO: Handle linear references with modifiers
11238 
11239   if (Kind == ParamKindTy::LinearWithVarStride)
11240     return false;
11241 
11242   return true;
11243 }
11244 
11245 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11246 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11247   QT = QT.getCanonicalType();
11248   unsigned Size = C.getTypeSize(QT);
11249 
11250   // Only scalars and complex within 16 bytes wide set PVB to true.
11251   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11252     return false;
11253 
11254   if (QT->isFloatingType())
11255     return true;
11256 
11257   if (QT->isIntegerType())
11258     return true;
11259 
11260   if (QT->isPointerType())
11261     return true;
11262 
11263   // TODO: Add support for complex types (section 3.1.2, item 2).
11264 
11265   return false;
11266 }
11267 
11268 /// Computes the lane size (LS) of a return type or of an input parameter,
11269 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11270 /// TODO: Add support for references, section 3.2.1, item 1.
11271 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11272   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11273     QualType PTy = QT.getCanonicalType()->getPointeeType();
11274     if (getAArch64PBV(PTy, C))
11275       return C.getTypeSize(PTy);
11276   }
11277   if (getAArch64PBV(QT, C))
11278     return C.getTypeSize(QT);
11279 
11280   return C.getTypeSize(C.getUIntPtrType());
11281 }
11282 
11283 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11284 // signature of the scalar function, as defined in 3.2.2 of the
11285 // AAVFABI.
11286 static std::tuple<unsigned, unsigned, bool>
11287 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11288   QualType RetType = FD->getReturnType().getCanonicalType();
11289 
11290   ASTContext &C = FD->getASTContext();
11291 
11292   bool OutputBecomesInput = false;
11293 
11294   llvm::SmallVector<unsigned, 8> Sizes;
11295   if (!RetType->isVoidType()) {
11296     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11297     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11298       OutputBecomesInput = true;
11299   }
11300   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11301     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11302     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11303   }
11304 
11305   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11306   // The LS of a function parameter / return value can only be a power
11307   // of 2, starting from 8 bits, up to 128.
11308   assert(std::all_of(Sizes.begin(), Sizes.end(),
11309                      [](unsigned Size) {
11310                        return Size == 8 || Size == 16 || Size == 32 ||
11311                               Size == 64 || Size == 128;
11312                      }) &&
11313          "Invalid size");
11314 
11315   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11316                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11317                          OutputBecomesInput);
11318 }
11319 
11320 /// Mangle the parameter part of the vector function name according to
11321 /// their OpenMP classification. The mangling function is defined in
11322 /// section 3.5 of the AAVFABI.
11323 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11324   SmallString<256> Buffer;
11325   llvm::raw_svector_ostream Out(Buffer);
11326   for (const auto &ParamAttr : ParamAttrs) {
11327     switch (ParamAttr.Kind) {
11328     case LinearWithVarStride:
11329       Out << "ls" << ParamAttr.StrideOrArg;
11330       break;
11331     case Linear:
11332       Out << 'l';
11333       // Don't print the step value if it is not present or if it is
11334       // equal to 1.
11335       if (ParamAttr.StrideOrArg != 1)
11336         Out << ParamAttr.StrideOrArg;
11337       break;
11338     case Uniform:
11339       Out << 'u';
11340       break;
11341     case Vector:
11342       Out << 'v';
11343       break;
11344     }
11345 
11346     if (!!ParamAttr.Alignment)
11347       Out << 'a' << ParamAttr.Alignment;
11348   }
11349 
11350   return std::string(Out.str());
11351 }
11352 
11353 // Function used to add the attribute. The parameter `VLEN` is
11354 // templated to allow the use of "x" when targeting scalable functions
11355 // for SVE.
11356 template <typename T>
11357 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11358                                  char ISA, StringRef ParSeq,
11359                                  StringRef MangledName, bool OutputBecomesInput,
11360                                  llvm::Function *Fn) {
11361   SmallString<256> Buffer;
11362   llvm::raw_svector_ostream Out(Buffer);
11363   Out << Prefix << ISA << LMask << VLEN;
11364   if (OutputBecomesInput)
11365     Out << "v";
11366   Out << ParSeq << "_" << MangledName;
11367   Fn->addFnAttr(Out.str());
11368 }
11369 
11370 // Helper function to generate the Advanced SIMD names depending on
11371 // the value of the NDS when simdlen is not present.
11372 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11373                                       StringRef Prefix, char ISA,
11374                                       StringRef ParSeq, StringRef MangledName,
11375                                       bool OutputBecomesInput,
11376                                       llvm::Function *Fn) {
11377   switch (NDS) {
11378   case 8:
11379     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11380                          OutputBecomesInput, Fn);
11381     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11382                          OutputBecomesInput, Fn);
11383     break;
11384   case 16:
11385     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11386                          OutputBecomesInput, Fn);
11387     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11388                          OutputBecomesInput, Fn);
11389     break;
11390   case 32:
11391     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11392                          OutputBecomesInput, Fn);
11393     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11394                          OutputBecomesInput, Fn);
11395     break;
11396   case 64:
11397   case 128:
11398     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11399                          OutputBecomesInput, Fn);
11400     break;
11401   default:
11402     llvm_unreachable("Scalar type is too wide.");
11403   }
11404 }
11405 
11406 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11407 static void emitAArch64DeclareSimdFunction(
11408     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11409     ArrayRef<ParamAttrTy> ParamAttrs,
11410     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11411     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11412 
11413   // Get basic data for building the vector signature.
11414   const auto Data = getNDSWDS(FD, ParamAttrs);
11415   const unsigned NDS = std::get<0>(Data);
11416   const unsigned WDS = std::get<1>(Data);
11417   const bool OutputBecomesInput = std::get<2>(Data);
11418 
11419   // Check the values provided via `simdlen` by the user.
11420   // 1. A `simdlen(1)` doesn't produce vector signatures,
11421   if (UserVLEN == 1) {
11422     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11423         DiagnosticsEngine::Warning,
11424         "The clause simdlen(1) has no effect when targeting aarch64.");
11425     CGM.getDiags().Report(SLoc, DiagID);
11426     return;
11427   }
11428 
11429   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11430   // Advanced SIMD output.
11431   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11432     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11433         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11434                                     "power of 2 when targeting Advanced SIMD.");
11435     CGM.getDiags().Report(SLoc, DiagID);
11436     return;
11437   }
11438 
11439   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11440   // limits.
11441   if (ISA == 's' && UserVLEN != 0) {
11442     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11443       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11444           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11445                                       "lanes in the architectural constraints "
11446                                       "for SVE (min is 128-bit, max is "
11447                                       "2048-bit, by steps of 128-bit)");
11448       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11449       return;
11450     }
11451   }
11452 
11453   // Sort out parameter sequence.
11454   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11455   StringRef Prefix = "_ZGV";
11456   // Generate simdlen from user input (if any).
11457   if (UserVLEN) {
11458     if (ISA == 's') {
11459       // SVE generates only a masked function.
11460       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11461                            OutputBecomesInput, Fn);
11462     } else {
11463       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11464       // Advanced SIMD generates one or two functions, depending on
11465       // the `[not]inbranch` clause.
11466       switch (State) {
11467       case OMPDeclareSimdDeclAttr::BS_Undefined:
11468         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11469                              OutputBecomesInput, Fn);
11470         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11471                              OutputBecomesInput, Fn);
11472         break;
11473       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11474         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11475                              OutputBecomesInput, Fn);
11476         break;
11477       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11478         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11479                              OutputBecomesInput, Fn);
11480         break;
11481       }
11482     }
11483   } else {
11484     // If no user simdlen is provided, follow the AAVFABI rules for
11485     // generating the vector length.
11486     if (ISA == 's') {
11487       // SVE, section 3.4.1, item 1.
11488       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11489                            OutputBecomesInput, Fn);
11490     } else {
11491       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11492       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11493       // two vector names depending on the use of the clause
11494       // `[not]inbranch`.
11495       switch (State) {
11496       case OMPDeclareSimdDeclAttr::BS_Undefined:
11497         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11498                                   OutputBecomesInput, Fn);
11499         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11500                                   OutputBecomesInput, Fn);
11501         break;
11502       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11503         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11504                                   OutputBecomesInput, Fn);
11505         break;
11506       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11507         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11508                                   OutputBecomesInput, Fn);
11509         break;
11510       }
11511     }
11512   }
11513 }
11514 
11515 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11516                                               llvm::Function *Fn) {
11517   ASTContext &C = CGM.getContext();
11518   FD = FD->getMostRecentDecl();
11519   // Map params to their positions in function decl.
11520   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11521   if (isa<CXXMethodDecl>(FD))
11522     ParamPositions.try_emplace(FD, 0);
11523   unsigned ParamPos = ParamPositions.size();
11524   for (const ParmVarDecl *P : FD->parameters()) {
11525     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11526     ++ParamPos;
11527   }
11528   while (FD) {
11529     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11530       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11531       // Mark uniform parameters.
11532       for (const Expr *E : Attr->uniforms()) {
11533         E = E->IgnoreParenImpCasts();
11534         unsigned Pos;
11535         if (isa<CXXThisExpr>(E)) {
11536           Pos = ParamPositions[FD];
11537         } else {
11538           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11539                                 ->getCanonicalDecl();
11540           Pos = ParamPositions[PVD];
11541         }
11542         ParamAttrs[Pos].Kind = Uniform;
11543       }
11544       // Get alignment info.
11545       auto NI = Attr->alignments_begin();
11546       for (const Expr *E : Attr->aligneds()) {
11547         E = E->IgnoreParenImpCasts();
11548         unsigned Pos;
11549         QualType ParmTy;
11550         if (isa<CXXThisExpr>(E)) {
11551           Pos = ParamPositions[FD];
11552           ParmTy = E->getType();
11553         } else {
11554           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11555                                 ->getCanonicalDecl();
11556           Pos = ParamPositions[PVD];
11557           ParmTy = PVD->getType();
11558         }
11559         ParamAttrs[Pos].Alignment =
11560             (*NI)
11561                 ? (*NI)->EvaluateKnownConstInt(C)
11562                 : llvm::APSInt::getUnsigned(
11563                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11564                           .getQuantity());
11565         ++NI;
11566       }
11567       // Mark linear parameters.
11568       auto SI = Attr->steps_begin();
11569       auto MI = Attr->modifiers_begin();
11570       for (const Expr *E : Attr->linears()) {
11571         E = E->IgnoreParenImpCasts();
11572         unsigned Pos;
11573         // Rescaling factor needed to compute the linear parameter
11574         // value in the mangled name.
11575         unsigned PtrRescalingFactor = 1;
11576         if (isa<CXXThisExpr>(E)) {
11577           Pos = ParamPositions[FD];
11578         } else {
11579           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11580                                 ->getCanonicalDecl();
11581           Pos = ParamPositions[PVD];
11582           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11583             PtrRescalingFactor = CGM.getContext()
11584                                      .getTypeSizeInChars(P->getPointeeType())
11585                                      .getQuantity();
11586         }
11587         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11588         ParamAttr.Kind = Linear;
11589         // Assuming a stride of 1, for `linear` without modifiers.
11590         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11591         if (*SI) {
11592           Expr::EvalResult Result;
11593           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11594             if (const auto *DRE =
11595                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11596               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11597                 ParamAttr.Kind = LinearWithVarStride;
11598                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11599                     ParamPositions[StridePVD->getCanonicalDecl()]);
11600               }
11601             }
11602           } else {
11603             ParamAttr.StrideOrArg = Result.Val.getInt();
11604           }
11605         }
11606         // If we are using a linear clause on a pointer, we need to
11607         // rescale the value of linear_step with the byte size of the
11608         // pointee type.
11609         if (Linear == ParamAttr.Kind)
11610           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11611         ++SI;
11612         ++MI;
11613       }
11614       llvm::APSInt VLENVal;
11615       SourceLocation ExprLoc;
11616       const Expr *VLENExpr = Attr->getSimdlen();
11617       if (VLENExpr) {
11618         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11619         ExprLoc = VLENExpr->getExprLoc();
11620       }
11621       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11622       if (CGM.getTriple().isX86()) {
11623         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11624       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11625         unsigned VLEN = VLENVal.getExtValue();
11626         StringRef MangledName = Fn->getName();
11627         if (CGM.getTarget().hasFeature("sve"))
11628           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11629                                          MangledName, 's', 128, Fn, ExprLoc);
11630         if (CGM.getTarget().hasFeature("neon"))
11631           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11632                                          MangledName, 'n', 128, Fn, ExprLoc);
11633       }
11634     }
11635     FD = FD->getPreviousDecl();
11636   }
11637 }
11638 
11639 namespace {
11640 /// Cleanup action for doacross support.
11641 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11642 public:
11643   static const int DoacrossFinArgs = 2;
11644 
11645 private:
11646   llvm::FunctionCallee RTLFn;
11647   llvm::Value *Args[DoacrossFinArgs];
11648 
11649 public:
11650   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11651                     ArrayRef<llvm::Value *> CallArgs)
11652       : RTLFn(RTLFn) {
11653     assert(CallArgs.size() == DoacrossFinArgs);
11654     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11655   }
11656   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11657     if (!CGF.HaveInsertPoint())
11658       return;
11659     CGF.EmitRuntimeCall(RTLFn, Args);
11660   }
11661 };
11662 } // namespace
11663 
11664 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11665                                        const OMPLoopDirective &D,
11666                                        ArrayRef<Expr *> NumIterations) {
11667   if (!CGF.HaveInsertPoint())
11668     return;
11669 
11670   ASTContext &C = CGM.getContext();
11671   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11672   RecordDecl *RD;
11673   if (KmpDimTy.isNull()) {
11674     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11675     //  kmp_int64 lo; // lower
11676     //  kmp_int64 up; // upper
11677     //  kmp_int64 st; // stride
11678     // };
11679     RD = C.buildImplicitRecord("kmp_dim");
11680     RD->startDefinition();
11681     addFieldToRecordDecl(C, RD, Int64Ty);
11682     addFieldToRecordDecl(C, RD, Int64Ty);
11683     addFieldToRecordDecl(C, RD, Int64Ty);
11684     RD->completeDefinition();
11685     KmpDimTy = C.getRecordType(RD);
11686   } else {
11687     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11688   }
11689   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11690   QualType ArrayTy =
11691       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11692 
11693   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11694   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11695   enum { LowerFD = 0, UpperFD, StrideFD };
11696   // Fill dims with data.
11697   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11698     LValue DimsLVal = CGF.MakeAddrLValue(
11699         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11700     // dims.upper = num_iterations;
11701     LValue UpperLVal = CGF.EmitLValueForField(
11702         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11703     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11704         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11705         Int64Ty, NumIterations[I]->getExprLoc());
11706     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11707     // dims.stride = 1;
11708     LValue StrideLVal = CGF.EmitLValueForField(
11709         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11710     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11711                           StrideLVal);
11712   }
11713 
11714   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11715   // kmp_int32 num_dims, struct kmp_dim * dims);
11716   llvm::Value *Args[] = {
11717       emitUpdateLocation(CGF, D.getBeginLoc()),
11718       getThreadID(CGF, D.getBeginLoc()),
11719       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11720       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11721           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11722           CGM.VoidPtrTy)};
11723 
11724   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11725       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11726   CGF.EmitRuntimeCall(RTLFn, Args);
11727   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11728       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11729   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11730       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11731   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11732                                              llvm::makeArrayRef(FiniArgs));
11733 }
11734 
11735 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11736                                           const OMPDependClause *C) {
11737   QualType Int64Ty =
11738       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11739   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11740   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11741       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11742   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11743   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11744     const Expr *CounterVal = C->getLoopData(I);
11745     assert(CounterVal);
11746     llvm::Value *CntVal = CGF.EmitScalarConversion(
11747         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11748         CounterVal->getExprLoc());
11749     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11750                           /*Volatile=*/false, Int64Ty);
11751   }
11752   llvm::Value *Args[] = {
11753       emitUpdateLocation(CGF, C->getBeginLoc()),
11754       getThreadID(CGF, C->getBeginLoc()),
11755       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11756   llvm::FunctionCallee RTLFn;
11757   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11758     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11759                                                   OMPRTL___kmpc_doacross_post);
11760   } else {
11761     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11762     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11763                                                   OMPRTL___kmpc_doacross_wait);
11764   }
11765   CGF.EmitRuntimeCall(RTLFn, Args);
11766 }
11767 
11768 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11769                                llvm::FunctionCallee Callee,
11770                                ArrayRef<llvm::Value *> Args) const {
11771   assert(Loc.isValid() && "Outlined function call location must be valid.");
11772   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11773 
11774   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11775     if (Fn->doesNotThrow()) {
11776       CGF.EmitNounwindRuntimeCall(Fn, Args);
11777       return;
11778     }
11779   }
11780   CGF.EmitRuntimeCall(Callee, Args);
11781 }
11782 
11783 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11784     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11785     ArrayRef<llvm::Value *> Args) const {
11786   emitCall(CGF, Loc, OutlinedFn, Args);
11787 }
11788 
11789 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11790   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11791     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11792       HasEmittedDeclareTargetRegion = true;
11793 }
11794 
11795 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11796                                              const VarDecl *NativeParam,
11797                                              const VarDecl *TargetParam) const {
11798   return CGF.GetAddrOfLocalVar(NativeParam);
11799 }
11800 
11801 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11802                                                    const VarDecl *VD) {
11803   if (!VD)
11804     return Address::invalid();
11805   Address UntiedAddr = Address::invalid();
11806   Address UntiedRealAddr = Address::invalid();
11807   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11808   if (It != FunctionToUntiedTaskStackMap.end()) {
11809     const UntiedLocalVarsAddressesMap &UntiedData =
11810         UntiedLocalVarsStack[It->second];
11811     auto I = UntiedData.find(VD);
11812     if (I != UntiedData.end()) {
11813       UntiedAddr = I->second.first;
11814       UntiedRealAddr = I->second.second;
11815     }
11816   }
11817   const VarDecl *CVD = VD->getCanonicalDecl();
11818   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11819     // Use the default allocation.
11820     if (!isAllocatableDecl(VD))
11821       return UntiedAddr;
11822     llvm::Value *Size;
11823     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11824     if (CVD->getType()->isVariablyModifiedType()) {
11825       Size = CGF.getTypeSize(CVD->getType());
11826       // Align the size: ((size + align - 1) / align) * align
11827       Size = CGF.Builder.CreateNUWAdd(
11828           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11829       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11830       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11831     } else {
11832       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11833       Size = CGM.getSize(Sz.alignTo(Align));
11834     }
11835     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11836     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11837     assert(AA->getAllocator() &&
11838            "Expected allocator expression for non-default allocator.");
11839     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11840     // According to the standard, the original allocator type is a enum
11841     // (integer). Convert to pointer type, if required.
11842     Allocator = CGF.EmitScalarConversion(
11843         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11844         AA->getAllocator()->getExprLoc());
11845     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11846 
11847     llvm::Value *Addr =
11848         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11849                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11850                             Args, getName({CVD->getName(), ".void.addr"}));
11851     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11852         CGM.getModule(), OMPRTL___kmpc_free);
11853     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11854     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11855         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11856     if (UntiedAddr.isValid())
11857       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11858 
11859     // Cleanup action for allocate support.
11860     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11861       llvm::FunctionCallee RTLFn;
11862       unsigned LocEncoding;
11863       Address Addr;
11864       const Expr *Allocator;
11865 
11866     public:
11867       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11868                            Address Addr, const Expr *Allocator)
11869           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11870             Allocator(Allocator) {}
11871       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11872         if (!CGF.HaveInsertPoint())
11873           return;
11874         llvm::Value *Args[3];
11875         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11876             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11877         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11878             Addr.getPointer(), CGF.VoidPtrTy);
11879         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11880         // According to the standard, the original allocator type is a enum
11881         // (integer). Convert to pointer type, if required.
11882         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11883                                             CGF.getContext().VoidPtrTy,
11884                                             Allocator->getExprLoc());
11885         Args[2] = AllocVal;
11886 
11887         CGF.EmitRuntimeCall(RTLFn, Args);
11888       }
11889     };
11890     Address VDAddr =
11891         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11892     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11893         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11894         VDAddr, AA->getAllocator());
11895     if (UntiedRealAddr.isValid())
11896       if (auto *Region =
11897               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11898         Region->emitUntiedSwitch(CGF);
11899     return VDAddr;
11900   }
11901   return UntiedAddr;
11902 }
11903 
11904 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11905                                              const VarDecl *VD) const {
11906   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11907   if (It == FunctionToUntiedTaskStackMap.end())
11908     return false;
11909   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11910 }
11911 
11912 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11913     CodeGenModule &CGM, const OMPLoopDirective &S)
11914     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11915   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11916   if (!NeedToPush)
11917     return;
11918   NontemporalDeclsSet &DS =
11919       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11920   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11921     for (const Stmt *Ref : C->private_refs()) {
11922       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11923       const ValueDecl *VD;
11924       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11925         VD = DRE->getDecl();
11926       } else {
11927         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11928         assert((ME->isImplicitCXXThis() ||
11929                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11930                "Expected member of current class.");
11931         VD = ME->getMemberDecl();
11932       }
11933       DS.insert(VD);
11934     }
11935   }
11936 }
11937 
11938 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11939   if (!NeedToPush)
11940     return;
11941   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11942 }
11943 
11944 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11945     CodeGenFunction &CGF,
11946     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11947                          std::pair<Address, Address>> &LocalVars)
11948     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11949   if (!NeedToPush)
11950     return;
11951   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11952       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11953   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11954 }
11955 
11956 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11957   if (!NeedToPush)
11958     return;
11959   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11960 }
11961 
11962 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11963   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11964 
11965   return llvm::any_of(
11966       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11967       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11968 }
11969 
11970 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11971     const OMPExecutableDirective &S,
11972     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11973     const {
11974   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11975   // Vars in target/task regions must be excluded completely.
11976   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11977       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11978     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11979     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11980     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11981     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11982       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11983         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11984     }
11985   }
11986   // Exclude vars in private clauses.
11987   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11988     for (const Expr *Ref : C->varlists()) {
11989       if (!Ref->getType()->isScalarType())
11990         continue;
11991       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11992       if (!DRE)
11993         continue;
11994       NeedToCheckForLPCs.insert(DRE->getDecl());
11995     }
11996   }
11997   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11998     for (const Expr *Ref : C->varlists()) {
11999       if (!Ref->getType()->isScalarType())
12000         continue;
12001       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12002       if (!DRE)
12003         continue;
12004       NeedToCheckForLPCs.insert(DRE->getDecl());
12005     }
12006   }
12007   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12008     for (const Expr *Ref : C->varlists()) {
12009       if (!Ref->getType()->isScalarType())
12010         continue;
12011       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12012       if (!DRE)
12013         continue;
12014       NeedToCheckForLPCs.insert(DRE->getDecl());
12015     }
12016   }
12017   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12018     for (const Expr *Ref : C->varlists()) {
12019       if (!Ref->getType()->isScalarType())
12020         continue;
12021       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12022       if (!DRE)
12023         continue;
12024       NeedToCheckForLPCs.insert(DRE->getDecl());
12025     }
12026   }
12027   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12028     for (const Expr *Ref : C->varlists()) {
12029       if (!Ref->getType()->isScalarType())
12030         continue;
12031       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12032       if (!DRE)
12033         continue;
12034       NeedToCheckForLPCs.insert(DRE->getDecl());
12035     }
12036   }
12037   for (const Decl *VD : NeedToCheckForLPCs) {
12038     for (const LastprivateConditionalData &Data :
12039          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12040       if (Data.DeclToUniqueName.count(VD) > 0) {
12041         if (!Data.Disabled)
12042           NeedToAddForLPCsAsDisabled.insert(VD);
12043         break;
12044       }
12045     }
12046   }
12047 }
12048 
12049 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12050     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12051     : CGM(CGF.CGM),
12052       Action((CGM.getLangOpts().OpenMP >= 50 &&
12053               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12054                            [](const OMPLastprivateClause *C) {
12055                              return C->getKind() ==
12056                                     OMPC_LASTPRIVATE_conditional;
12057                            }))
12058                  ? ActionToDo::PushAsLastprivateConditional
12059                  : ActionToDo::DoNotPush) {
12060   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12061   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12062     return;
12063   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12064          "Expected a push action.");
12065   LastprivateConditionalData &Data =
12066       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12067   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12068     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12069       continue;
12070 
12071     for (const Expr *Ref : C->varlists()) {
12072       Data.DeclToUniqueName.insert(std::make_pair(
12073           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12074           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12075     }
12076   }
12077   Data.IVLVal = IVLVal;
12078   Data.Fn = CGF.CurFn;
12079 }
12080 
12081 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12082     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12083     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12084   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12085   if (CGM.getLangOpts().OpenMP < 50)
12086     return;
12087   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12088   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12089   if (!NeedToAddForLPCsAsDisabled.empty()) {
12090     Action = ActionToDo::DisableLastprivateConditional;
12091     LastprivateConditionalData &Data =
12092         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12093     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12094       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12095     Data.Fn = CGF.CurFn;
12096     Data.Disabled = true;
12097   }
12098 }
12099 
12100 CGOpenMPRuntime::LastprivateConditionalRAII
12101 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12102     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12103   return LastprivateConditionalRAII(CGF, S);
12104 }
12105 
12106 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12107   if (CGM.getLangOpts().OpenMP < 50)
12108     return;
12109   if (Action == ActionToDo::DisableLastprivateConditional) {
12110     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12111            "Expected list of disabled private vars.");
12112     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12113   }
12114   if (Action == ActionToDo::PushAsLastprivateConditional) {
12115     assert(
12116         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12117         "Expected list of lastprivate conditional vars.");
12118     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12119   }
12120 }
12121 
12122 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12123                                                         const VarDecl *VD) {
12124   ASTContext &C = CGM.getContext();
12125   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12126   if (I == LastprivateConditionalToTypes.end())
12127     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12128   QualType NewType;
12129   const FieldDecl *VDField;
12130   const FieldDecl *FiredField;
12131   LValue BaseLVal;
12132   auto VI = I->getSecond().find(VD);
12133   if (VI == I->getSecond().end()) {
12134     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12135     RD->startDefinition();
12136     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12137     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12138     RD->completeDefinition();
12139     NewType = C.getRecordType(RD);
12140     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12141     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12142     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12143   } else {
12144     NewType = std::get<0>(VI->getSecond());
12145     VDField = std::get<1>(VI->getSecond());
12146     FiredField = std::get<2>(VI->getSecond());
12147     BaseLVal = std::get<3>(VI->getSecond());
12148   }
12149   LValue FiredLVal =
12150       CGF.EmitLValueForField(BaseLVal, FiredField);
12151   CGF.EmitStoreOfScalar(
12152       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12153       FiredLVal);
12154   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12155 }
12156 
12157 namespace {
12158 /// Checks if the lastprivate conditional variable is referenced in LHS.
12159 class LastprivateConditionalRefChecker final
12160     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12161   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12162   const Expr *FoundE = nullptr;
12163   const Decl *FoundD = nullptr;
12164   StringRef UniqueDeclName;
12165   LValue IVLVal;
12166   llvm::Function *FoundFn = nullptr;
12167   SourceLocation Loc;
12168 
12169 public:
12170   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12171     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12172          llvm::reverse(LPM)) {
12173       auto It = D.DeclToUniqueName.find(E->getDecl());
12174       if (It == D.DeclToUniqueName.end())
12175         continue;
12176       if (D.Disabled)
12177         return false;
12178       FoundE = E;
12179       FoundD = E->getDecl()->getCanonicalDecl();
12180       UniqueDeclName = It->second;
12181       IVLVal = D.IVLVal;
12182       FoundFn = D.Fn;
12183       break;
12184     }
12185     return FoundE == E;
12186   }
12187   bool VisitMemberExpr(const MemberExpr *E) {
12188     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12189       return false;
12190     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12191          llvm::reverse(LPM)) {
12192       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12193       if (It == D.DeclToUniqueName.end())
12194         continue;
12195       if (D.Disabled)
12196         return false;
12197       FoundE = E;
12198       FoundD = E->getMemberDecl()->getCanonicalDecl();
12199       UniqueDeclName = It->second;
12200       IVLVal = D.IVLVal;
12201       FoundFn = D.Fn;
12202       break;
12203     }
12204     return FoundE == E;
12205   }
12206   bool VisitStmt(const Stmt *S) {
12207     for (const Stmt *Child : S->children()) {
12208       if (!Child)
12209         continue;
12210       if (const auto *E = dyn_cast<Expr>(Child))
12211         if (!E->isGLValue())
12212           continue;
12213       if (Visit(Child))
12214         return true;
12215     }
12216     return false;
12217   }
12218   explicit LastprivateConditionalRefChecker(
12219       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12220       : LPM(LPM) {}
12221   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12222   getFoundData() const {
12223     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12224   }
12225 };
12226 } // namespace
12227 
12228 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12229                                                        LValue IVLVal,
12230                                                        StringRef UniqueDeclName,
12231                                                        LValue LVal,
12232                                                        SourceLocation Loc) {
12233   // Last updated loop counter for the lastprivate conditional var.
12234   // int<xx> last_iv = 0;
12235   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12236   llvm::Constant *LastIV =
12237       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12238   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12239       IVLVal.getAlignment().getAsAlign());
12240   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12241 
12242   // Last value of the lastprivate conditional.
12243   // decltype(priv_a) last_a;
12244   llvm::Constant *Last = getOrCreateInternalVariable(
12245       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12246   cast<llvm::GlobalVariable>(Last)->setAlignment(
12247       LVal.getAlignment().getAsAlign());
12248   LValue LastLVal =
12249       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12250 
12251   // Global loop counter. Required to handle inner parallel-for regions.
12252   // iv
12253   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12254 
12255   // #pragma omp critical(a)
12256   // if (last_iv <= iv) {
12257   //   last_iv = iv;
12258   //   last_a = priv_a;
12259   // }
12260   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12261                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12262     Action.Enter(CGF);
12263     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12264     // (last_iv <= iv) ? Check if the variable is updated and store new
12265     // value in global var.
12266     llvm::Value *CmpRes;
12267     if (IVLVal.getType()->isSignedIntegerType()) {
12268       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12269     } else {
12270       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12271              "Loop iteration variable must be integer.");
12272       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12273     }
12274     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12275     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12276     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12277     // {
12278     CGF.EmitBlock(ThenBB);
12279 
12280     //   last_iv = iv;
12281     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12282 
12283     //   last_a = priv_a;
12284     switch (CGF.getEvaluationKind(LVal.getType())) {
12285     case TEK_Scalar: {
12286       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12287       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12288       break;
12289     }
12290     case TEK_Complex: {
12291       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12292       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12293       break;
12294     }
12295     case TEK_Aggregate:
12296       llvm_unreachable(
12297           "Aggregates are not supported in lastprivate conditional.");
12298     }
12299     // }
12300     CGF.EmitBranch(ExitBB);
12301     // There is no need to emit line number for unconditional branch.
12302     (void)ApplyDebugLocation::CreateEmpty(CGF);
12303     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12304   };
12305 
12306   if (CGM.getLangOpts().OpenMPSimd) {
12307     // Do not emit as a critical region as no parallel region could be emitted.
12308     RegionCodeGenTy ThenRCG(CodeGen);
12309     ThenRCG(CGF);
12310   } else {
12311     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12312   }
12313 }
12314 
12315 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12316                                                          const Expr *LHS) {
12317   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12318     return;
12319   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12320   if (!Checker.Visit(LHS))
12321     return;
12322   const Expr *FoundE;
12323   const Decl *FoundD;
12324   StringRef UniqueDeclName;
12325   LValue IVLVal;
12326   llvm::Function *FoundFn;
12327   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12328       Checker.getFoundData();
12329   if (FoundFn != CGF.CurFn) {
12330     // Special codegen for inner parallel regions.
12331     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12332     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12333     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12334            "Lastprivate conditional is not found in outer region.");
12335     QualType StructTy = std::get<0>(It->getSecond());
12336     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12337     LValue PrivLVal = CGF.EmitLValue(FoundE);
12338     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12339         PrivLVal.getAddress(CGF),
12340         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12341     LValue BaseLVal =
12342         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12343     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12344     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12345                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12346                         FiredLVal, llvm::AtomicOrdering::Unordered,
12347                         /*IsVolatile=*/true, /*isInit=*/false);
12348     return;
12349   }
12350 
12351   // Private address of the lastprivate conditional in the current context.
12352   // priv_a
12353   LValue LVal = CGF.EmitLValue(FoundE);
12354   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12355                                    FoundE->getExprLoc());
12356 }
12357 
12358 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12359     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12360     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12361   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12362     return;
12363   auto Range = llvm::reverse(LastprivateConditionalStack);
12364   auto It = llvm::find_if(
12365       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12366   if (It == Range.end() || It->Fn != CGF.CurFn)
12367     return;
12368   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12369   assert(LPCI != LastprivateConditionalToTypes.end() &&
12370          "Lastprivates must be registered already.");
12371   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12372   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12373   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12374   for (const auto &Pair : It->DeclToUniqueName) {
12375     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12376     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12377       continue;
12378     auto I = LPCI->getSecond().find(Pair.first);
12379     assert(I != LPCI->getSecond().end() &&
12380            "Lastprivate must be rehistered already.");
12381     // bool Cmp = priv_a.Fired != 0;
12382     LValue BaseLVal = std::get<3>(I->getSecond());
12383     LValue FiredLVal =
12384         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12385     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12386     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12387     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12388     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12389     // if (Cmp) {
12390     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12391     CGF.EmitBlock(ThenBB);
12392     Address Addr = CGF.GetAddrOfLocalVar(VD);
12393     LValue LVal;
12394     if (VD->getType()->isReferenceType())
12395       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12396                                            AlignmentSource::Decl);
12397     else
12398       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12399                                 AlignmentSource::Decl);
12400     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12401                                      D.getBeginLoc());
12402     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12403     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12404     // }
12405   }
12406 }
12407 
12408 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12409     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12410     SourceLocation Loc) {
12411   if (CGF.getLangOpts().OpenMP < 50)
12412     return;
12413   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12414   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12415          "Unknown lastprivate conditional variable.");
12416   StringRef UniqueName = It->second;
12417   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12418   // The variable was not updated in the region - exit.
12419   if (!GV)
12420     return;
12421   LValue LPLVal = CGF.MakeAddrLValue(
12422       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12423   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12424   CGF.EmitStoreOfScalar(Res, PrivLVal);
12425 }
12426 
12427 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12428     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12429     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12430   llvm_unreachable("Not supported in SIMD-only mode");
12431 }
12432 
12433 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12434     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12435     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12436   llvm_unreachable("Not supported in SIMD-only mode");
12437 }
12438 
12439 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12440     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12441     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12442     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12443     bool Tied, unsigned &NumberOfParts) {
12444   llvm_unreachable("Not supported in SIMD-only mode");
12445 }
12446 
12447 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12448                                            SourceLocation Loc,
12449                                            llvm::Function *OutlinedFn,
12450                                            ArrayRef<llvm::Value *> CapturedVars,
12451                                            const Expr *IfCond) {
12452   llvm_unreachable("Not supported in SIMD-only mode");
12453 }
12454 
12455 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12456     CodeGenFunction &CGF, StringRef CriticalName,
12457     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12458     const Expr *Hint) {
12459   llvm_unreachable("Not supported in SIMD-only mode");
12460 }
12461 
12462 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12463                                            const RegionCodeGenTy &MasterOpGen,
12464                                            SourceLocation Loc) {
12465   llvm_unreachable("Not supported in SIMD-only mode");
12466 }
12467 
12468 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12469                                             SourceLocation Loc) {
12470   llvm_unreachable("Not supported in SIMD-only mode");
12471 }
12472 
12473 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12474     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12475     SourceLocation Loc) {
12476   llvm_unreachable("Not supported in SIMD-only mode");
12477 }
12478 
12479 void CGOpenMPSIMDRuntime::emitSingleRegion(
12480     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12481     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12482     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12483     ArrayRef<const Expr *> AssignmentOps) {
12484   llvm_unreachable("Not supported in SIMD-only mode");
12485 }
12486 
12487 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12488                                             const RegionCodeGenTy &OrderedOpGen,
12489                                             SourceLocation Loc,
12490                                             bool IsThreads) {
12491   llvm_unreachable("Not supported in SIMD-only mode");
12492 }
12493 
12494 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12495                                           SourceLocation Loc,
12496                                           OpenMPDirectiveKind Kind,
12497                                           bool EmitChecks,
12498                                           bool ForceSimpleCall) {
12499   llvm_unreachable("Not supported in SIMD-only mode");
12500 }
12501 
12502 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12503     CodeGenFunction &CGF, SourceLocation Loc,
12504     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12505     bool Ordered, const DispatchRTInput &DispatchValues) {
12506   llvm_unreachable("Not supported in SIMD-only mode");
12507 }
12508 
12509 void CGOpenMPSIMDRuntime::emitForStaticInit(
12510     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12511     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12512   llvm_unreachable("Not supported in SIMD-only mode");
12513 }
12514 
12515 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12516     CodeGenFunction &CGF, SourceLocation Loc,
12517     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12518   llvm_unreachable("Not supported in SIMD-only mode");
12519 }
12520 
12521 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12522                                                      SourceLocation Loc,
12523                                                      unsigned IVSize,
12524                                                      bool IVSigned) {
12525   llvm_unreachable("Not supported in SIMD-only mode");
12526 }
12527 
12528 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12529                                               SourceLocation Loc,
12530                                               OpenMPDirectiveKind DKind) {
12531   llvm_unreachable("Not supported in SIMD-only mode");
12532 }
12533 
12534 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12535                                               SourceLocation Loc,
12536                                               unsigned IVSize, bool IVSigned,
12537                                               Address IL, Address LB,
12538                                               Address UB, Address ST) {
12539   llvm_unreachable("Not supported in SIMD-only mode");
12540 }
12541 
12542 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12543                                                llvm::Value *NumThreads,
12544                                                SourceLocation Loc) {
12545   llvm_unreachable("Not supported in SIMD-only mode");
12546 }
12547 
12548 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12549                                              ProcBindKind ProcBind,
12550                                              SourceLocation Loc) {
12551   llvm_unreachable("Not supported in SIMD-only mode");
12552 }
12553 
12554 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12555                                                     const VarDecl *VD,
12556                                                     Address VDAddr,
12557                                                     SourceLocation Loc) {
12558   llvm_unreachable("Not supported in SIMD-only mode");
12559 }
12560 
12561 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12562     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12563     CodeGenFunction *CGF) {
12564   llvm_unreachable("Not supported in SIMD-only mode");
12565 }
12566 
12567 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12568     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12569   llvm_unreachable("Not supported in SIMD-only mode");
12570 }
12571 
12572 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12573                                     ArrayRef<const Expr *> Vars,
12574                                     SourceLocation Loc,
12575                                     llvm::AtomicOrdering AO) {
12576   llvm_unreachable("Not supported in SIMD-only mode");
12577 }
12578 
12579 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12580                                        const OMPExecutableDirective &D,
12581                                        llvm::Function *TaskFunction,
12582                                        QualType SharedsTy, Address Shareds,
12583                                        const Expr *IfCond,
12584                                        const OMPTaskDataTy &Data) {
12585   llvm_unreachable("Not supported in SIMD-only mode");
12586 }
12587 
12588 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12589     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12590     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12591     const Expr *IfCond, const OMPTaskDataTy &Data) {
12592   llvm_unreachable("Not supported in SIMD-only mode");
12593 }
12594 
12595 void CGOpenMPSIMDRuntime::emitReduction(
12596     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12597     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12598     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12599   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12600   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12601                                  ReductionOps, Options);
12602 }
12603 
12604 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12605     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12606     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12607   llvm_unreachable("Not supported in SIMD-only mode");
12608 }
12609 
12610 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12611                                                 SourceLocation Loc,
12612                                                 bool IsWorksharingReduction) {
12613   llvm_unreachable("Not supported in SIMD-only mode");
12614 }
12615 
12616 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12617                                                   SourceLocation Loc,
12618                                                   ReductionCodeGen &RCG,
12619                                                   unsigned N) {
12620   llvm_unreachable("Not supported in SIMD-only mode");
12621 }
12622 
12623 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12624                                                   SourceLocation Loc,
12625                                                   llvm::Value *ReductionsPtr,
12626                                                   LValue SharedLVal) {
12627   llvm_unreachable("Not supported in SIMD-only mode");
12628 }
12629 
12630 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12631                                            SourceLocation Loc) {
12632   llvm_unreachable("Not supported in SIMD-only mode");
12633 }
12634 
12635 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12636     CodeGenFunction &CGF, SourceLocation Loc,
12637     OpenMPDirectiveKind CancelRegion) {
12638   llvm_unreachable("Not supported in SIMD-only mode");
12639 }
12640 
12641 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12642                                          SourceLocation Loc, const Expr *IfCond,
12643                                          OpenMPDirectiveKind CancelRegion) {
12644   llvm_unreachable("Not supported in SIMD-only mode");
12645 }
12646 
12647 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12648     const OMPExecutableDirective &D, StringRef ParentName,
12649     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12650     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12651   llvm_unreachable("Not supported in SIMD-only mode");
12652 }
12653 
12654 void CGOpenMPSIMDRuntime::emitTargetCall(
12655     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12656     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12657     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12658     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12659                                      const OMPLoopDirective &D)>
12660         SizeEmitter) {
12661   llvm_unreachable("Not supported in SIMD-only mode");
12662 }
12663 
12664 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12665   llvm_unreachable("Not supported in SIMD-only mode");
12666 }
12667 
12668 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12673   return false;
12674 }
12675 
12676 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12677                                         const OMPExecutableDirective &D,
12678                                         SourceLocation Loc,
12679                                         llvm::Function *OutlinedFn,
12680                                         ArrayRef<llvm::Value *> CapturedVars) {
12681   llvm_unreachable("Not supported in SIMD-only mode");
12682 }
12683 
12684 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12685                                              const Expr *NumTeams,
12686                                              const Expr *ThreadLimit,
12687                                              SourceLocation Loc) {
12688   llvm_unreachable("Not supported in SIMD-only mode");
12689 }
12690 
12691 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12692     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12693     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12694   llvm_unreachable("Not supported in SIMD-only mode");
12695 }
12696 
12697 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12698     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12699     const Expr *Device) {
12700   llvm_unreachable("Not supported in SIMD-only mode");
12701 }
12702 
12703 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12704                                            const OMPLoopDirective &D,
12705                                            ArrayRef<Expr *> NumIterations) {
12706   llvm_unreachable("Not supported in SIMD-only mode");
12707 }
12708 
12709 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12710                                               const OMPDependClause *C) {
12711   llvm_unreachable("Not supported in SIMD-only mode");
12712 }
12713 
12714 const VarDecl *
12715 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12716                                         const VarDecl *NativeParam) const {
12717   llvm_unreachable("Not supported in SIMD-only mode");
12718 }
12719 
12720 Address
12721 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12722                                          const VarDecl *NativeParam,
12723                                          const VarDecl *TargetParam) const {
12724   llvm_unreachable("Not supported in SIMD-only mode");
12725 }
12726