1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101 
2102     // Ensure we do not inline the function. This is trivially true for the ones
2103     // passed to __kmpc_fork_call but the ones calles in serialized regions
2104     // could be inlined. This is not a perfect but it is closer to the invariant
2105     // we want, namely, every data environment starts with a new function.
2106     // TODO: We should pass the if condition to the runtime function and do the
2107     //       handling there. Much cleaner code.
2108     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2109     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2110 
2111     // __kmpc_end_serialized_parallel(&Loc, GTid);
2112     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2113     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2114                             M, OMPRTL___kmpc_end_serialized_parallel),
2115                         EndArgs);
2116   };
2117   if (IfCond) {
2118     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2119   } else {
2120     RegionCodeGenTy ThenRCG(ThenGen);
2121     ThenRCG(CGF);
2122   }
2123 }
2124 
2125 // If we're inside an (outlined) parallel region, use the region info's
2126 // thread-ID variable (it is passed in a first argument of the outlined function
2127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2128 // regular serial code region, get thread ID by calling kmp_int32
2129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2130 // return the address of that temp.
2131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2132                                              SourceLocation Loc) {
2133   if (auto *OMPRegionInfo =
2134           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2135     if (OMPRegionInfo->getThreadIDVariable())
2136       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2137 
2138   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2139   QualType Int32Ty =
2140       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2141   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2142   CGF.EmitStoreOfScalar(ThreadID,
2143                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2144 
2145   return ThreadIDTemp;
2146 }
2147 
2148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2149     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2150   SmallString<256> Buffer;
2151   llvm::raw_svector_ostream Out(Buffer);
2152   Out << Name;
2153   StringRef RuntimeName = Out.str();
2154   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2155   if (Elem.second) {
2156     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2157            "OMP internal variable has different type than requested");
2158     return &*Elem.second;
2159   }
2160 
2161   return Elem.second = new llvm::GlobalVariable(
2162              CGM.getModule(), Ty, /*IsConstant*/ false,
2163              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2164              Elem.first(), /*InsertBefore=*/nullptr,
2165              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2166 }
2167 
2168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2169   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2170   std::string Name = getName({Prefix, "var"});
2171   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2172 }
2173 
2174 namespace {
2175 /// Common pre(post)-action for different OpenMP constructs.
2176 class CommonActionTy final : public PrePostActionTy {
2177   llvm::FunctionCallee EnterCallee;
2178   ArrayRef<llvm::Value *> EnterArgs;
2179   llvm::FunctionCallee ExitCallee;
2180   ArrayRef<llvm::Value *> ExitArgs;
2181   bool Conditional;
2182   llvm::BasicBlock *ContBlock = nullptr;
2183 
2184 public:
2185   CommonActionTy(llvm::FunctionCallee EnterCallee,
2186                  ArrayRef<llvm::Value *> EnterArgs,
2187                  llvm::FunctionCallee ExitCallee,
2188                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2189       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2190         ExitArgs(ExitArgs), Conditional(Conditional) {}
2191   void Enter(CodeGenFunction &CGF) override {
2192     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2193     if (Conditional) {
2194       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2195       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2196       ContBlock = CGF.createBasicBlock("omp_if.end");
2197       // Generate the branch (If-stmt)
2198       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2199       CGF.EmitBlock(ThenBlock);
2200     }
2201   }
2202   void Done(CodeGenFunction &CGF) {
2203     // Emit the rest of blocks/branches
2204     CGF.EmitBranch(ContBlock);
2205     CGF.EmitBlock(ContBlock, true);
2206   }
2207   void Exit(CodeGenFunction &CGF) override {
2208     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2209   }
2210 };
2211 } // anonymous namespace
2212 
2213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2214                                          StringRef CriticalName,
2215                                          const RegionCodeGenTy &CriticalOpGen,
2216                                          SourceLocation Loc, const Expr *Hint) {
2217   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2218   // CriticalOpGen();
2219   // __kmpc_end_critical(ident_t *, gtid, Lock);
2220   // Prepare arguments and build a call to __kmpc_critical
2221   if (!CGF.HaveInsertPoint())
2222     return;
2223   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2224                          getCriticalRegionLock(CriticalName)};
2225   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2226                                                 std::end(Args));
2227   if (Hint) {
2228     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2229         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2230   }
2231   CommonActionTy Action(
2232       OMPBuilder.getOrCreateRuntimeFunction(
2233           CGM.getModule(),
2234           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2235       EnterArgs,
2236       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2237                                             OMPRTL___kmpc_end_critical),
2238       Args);
2239   CriticalOpGen.setAction(Action);
2240   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2241 }
2242 
2243 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2244                                        const RegionCodeGenTy &MasterOpGen,
2245                                        SourceLocation Loc) {
2246   if (!CGF.HaveInsertPoint())
2247     return;
2248   // if(__kmpc_master(ident_t *, gtid)) {
2249   //   MasterOpGen();
2250   //   __kmpc_end_master(ident_t *, gtid);
2251   // }
2252   // Prepare arguments and build a call to __kmpc_master
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2254   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2255                             CGM.getModule(), OMPRTL___kmpc_master),
2256                         Args,
2257                         OMPBuilder.getOrCreateRuntimeFunction(
2258                             CGM.getModule(), OMPRTL___kmpc_end_master),
2259                         Args,
2260                         /*Conditional=*/true);
2261   MasterOpGen.setAction(Action);
2262   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2263   Action.Done(CGF);
2264 }
2265 
2266 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2267                                         SourceLocation Loc) {
2268   if (!CGF.HaveInsertPoint())
2269     return;
2270   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2271     OMPBuilder.createTaskyield(CGF.Builder);
2272   } else {
2273     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2274     llvm::Value *Args[] = {
2275         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2276         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2277     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2278                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2279                         Args);
2280   }
2281 
2282   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2283     Region->emitUntiedSwitch(CGF);
2284 }
2285 
2286 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2287                                           const RegionCodeGenTy &TaskgroupOpGen,
2288                                           SourceLocation Loc) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   // __kmpc_taskgroup(ident_t *, gtid);
2292   // TaskgroupOpGen();
2293   // __kmpc_end_taskgroup(ident_t *, gtid);
2294   // Prepare arguments and build a call to __kmpc_taskgroup
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2298                         Args,
2299                         OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2301                         Args);
2302   TaskgroupOpGen.setAction(Action);
2303   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2304 }
2305 
2306 /// Given an array of pointers to variables, project the address of a
2307 /// given variable.
2308 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2309                                       unsigned Index, const VarDecl *Var) {
2310   // Pull out the pointer to the variable.
2311   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2312   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2313 
2314   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2315   Addr = CGF.Builder.CreateElementBitCast(
2316       Addr, CGF.ConvertTypeForMem(Var->getType()));
2317   return Addr;
2318 }
2319 
2320 static llvm::Value *emitCopyprivateCopyFunction(
2321     CodeGenModule &CGM, llvm::Type *ArgsType,
2322     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2323     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2324     SourceLocation Loc) {
2325   ASTContext &C = CGM.getContext();
2326   // void copy_func(void *LHSArg, void *RHSArg);
2327   FunctionArgList Args;
2328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2329                            ImplicitParamDecl::Other);
2330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2331                            ImplicitParamDecl::Other);
2332   Args.push_back(&LHSArg);
2333   Args.push_back(&RHSArg);
2334   const auto &CGFI =
2335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2336   std::string Name =
2337       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2338   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2339                                     llvm::GlobalValue::InternalLinkage, Name,
2340                                     &CGM.getModule());
2341   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2342   Fn->setDoesNotRecurse();
2343   CodeGenFunction CGF(CGM);
2344   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2345   // Dest = (void*[n])(LHSArg);
2346   // Src = (void*[n])(RHSArg);
2347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2348       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2349       ArgsType), CGF.getPointerAlign());
2350   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2351       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2352       ArgsType), CGF.getPointerAlign());
2353   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2354   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2355   // ...
2356   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2357   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2358     const auto *DestVar =
2359         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2360     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2361 
2362     const auto *SrcVar =
2363         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2364     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2365 
2366     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2367     QualType Type = VD->getType();
2368     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2369   }
2370   CGF.FinishFunction();
2371   return Fn;
2372 }
2373 
2374 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2375                                        const RegionCodeGenTy &SingleOpGen,
2376                                        SourceLocation Loc,
2377                                        ArrayRef<const Expr *> CopyprivateVars,
2378                                        ArrayRef<const Expr *> SrcExprs,
2379                                        ArrayRef<const Expr *> DstExprs,
2380                                        ArrayRef<const Expr *> AssignmentOps) {
2381   if (!CGF.HaveInsertPoint())
2382     return;
2383   assert(CopyprivateVars.size() == SrcExprs.size() &&
2384          CopyprivateVars.size() == DstExprs.size() &&
2385          CopyprivateVars.size() == AssignmentOps.size());
2386   ASTContext &C = CGM.getContext();
2387   // int32 did_it = 0;
2388   // if(__kmpc_single(ident_t *, gtid)) {
2389   //   SingleOpGen();
2390   //   __kmpc_end_single(ident_t *, gtid);
2391   //   did_it = 1;
2392   // }
2393   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2394   // <copy_func>, did_it);
2395 
2396   Address DidIt = Address::invalid();
2397   if (!CopyprivateVars.empty()) {
2398     // int32 did_it = 0;
2399     QualType KmpInt32Ty =
2400         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2401     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2402     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2403   }
2404   // Prepare arguments and build a call to __kmpc_single
2405   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2406   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2407                             CGM.getModule(), OMPRTL___kmpc_single),
2408                         Args,
2409                         OMPBuilder.getOrCreateRuntimeFunction(
2410                             CGM.getModule(), OMPRTL___kmpc_end_single),
2411                         Args,
2412                         /*Conditional=*/true);
2413   SingleOpGen.setAction(Action);
2414   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2415   if (DidIt.isValid()) {
2416     // did_it = 1;
2417     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2418   }
2419   Action.Done(CGF);
2420   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2421   // <copy_func>, did_it);
2422   if (DidIt.isValid()) {
2423     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2424     QualType CopyprivateArrayTy = C.getConstantArrayType(
2425         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2426         /*IndexTypeQuals=*/0);
2427     // Create a list of all private variables for copyprivate.
2428     Address CopyprivateList =
2429         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2430     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2431       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2432       CGF.Builder.CreateStore(
2433           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2434               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2435               CGF.VoidPtrTy),
2436           Elem);
2437     }
2438     // Build function that copies private values from single region to all other
2439     // threads in the corresponding parallel region.
2440     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2441         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2442         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2443     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2444     Address CL =
2445       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2446                                                       CGF.VoidPtrTy);
2447     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2448     llvm::Value *Args[] = {
2449         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2450         getThreadID(CGF, Loc),        // i32 <gtid>
2451         BufSize,                      // size_t <buf_size>
2452         CL.getPointer(),              // void *<copyprivate list>
2453         CpyFn,                        // void (*) (void *, void *) <copy_func>
2454         DidItVal                      // i32 did_it
2455     };
2456     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2458                         Args);
2459   }
2460 }
2461 
2462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2463                                         const RegionCodeGenTy &OrderedOpGen,
2464                                         SourceLocation Loc, bool IsThreads) {
2465   if (!CGF.HaveInsertPoint())
2466     return;
2467   // __kmpc_ordered(ident_t *, gtid);
2468   // OrderedOpGen();
2469   // __kmpc_end_ordered(ident_t *, gtid);
2470   // Prepare arguments and build a call to __kmpc_ordered
2471   if (IsThreads) {
2472     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2473     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2474                               CGM.getModule(), OMPRTL___kmpc_ordered),
2475                           Args,
2476                           OMPBuilder.getOrCreateRuntimeFunction(
2477                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2478                           Args);
2479     OrderedOpGen.setAction(Action);
2480     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2481     return;
2482   }
2483   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2484 }
2485 
2486 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2487   unsigned Flags;
2488   if (Kind == OMPD_for)
2489     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2490   else if (Kind == OMPD_sections)
2491     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2492   else if (Kind == OMPD_single)
2493     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2494   else if (Kind == OMPD_barrier)
2495     Flags = OMP_IDENT_BARRIER_EXPL;
2496   else
2497     Flags = OMP_IDENT_BARRIER_IMPL;
2498   return Flags;
2499 }
2500 
2501 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2502     CodeGenFunction &CGF, const OMPLoopDirective &S,
2503     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2504   // Check if the loop directive is actually a doacross loop directive. In this
2505   // case choose static, 1 schedule.
2506   if (llvm::any_of(
2507           S.getClausesOfKind<OMPOrderedClause>(),
2508           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2509     ScheduleKind = OMPC_SCHEDULE_static;
2510     // Chunk size is 1 in this case.
2511     llvm::APInt ChunkSize(32, 1);
2512     ChunkExpr = IntegerLiteral::Create(
2513         CGF.getContext(), ChunkSize,
2514         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2515         SourceLocation());
2516   }
2517 }
2518 
2519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2520                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2521                                       bool ForceSimpleCall) {
2522   // Check if we should use the OMPBuilder
2523   auto *OMPRegionInfo =
2524       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2525   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2526     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2527         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2528     return;
2529   }
2530 
2531   if (!CGF.HaveInsertPoint())
2532     return;
2533   // Build call __kmpc_cancel_barrier(loc, thread_id);
2534   // Build call __kmpc_barrier(loc, thread_id);
2535   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2536   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2537   // thread_id);
2538   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2539                          getThreadID(CGF, Loc)};
2540   if (OMPRegionInfo) {
2541     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2542       llvm::Value *Result = CGF.EmitRuntimeCall(
2543           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2544                                                 OMPRTL___kmpc_cancel_barrier),
2545           Args);
2546       if (EmitChecks) {
2547         // if (__kmpc_cancel_barrier()) {
2548         //   exit from construct;
2549         // }
2550         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2551         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2552         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2553         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2554         CGF.EmitBlock(ExitBB);
2555         //   exit from construct;
2556         CodeGenFunction::JumpDest CancelDestination =
2557             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2558         CGF.EmitBranchThroughCleanup(CancelDestination);
2559         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2560       }
2561       return;
2562     }
2563   }
2564   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2565                           CGM.getModule(), OMPRTL___kmpc_barrier),
2566                       Args);
2567 }
2568 
2569 /// Map the OpenMP loop schedule to the runtime enumeration.
2570 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2571                                           bool Chunked, bool Ordered) {
2572   switch (ScheduleKind) {
2573   case OMPC_SCHEDULE_static:
2574     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2575                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2576   case OMPC_SCHEDULE_dynamic:
2577     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2578   case OMPC_SCHEDULE_guided:
2579     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2580   case OMPC_SCHEDULE_runtime:
2581     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2582   case OMPC_SCHEDULE_auto:
2583     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2584   case OMPC_SCHEDULE_unknown:
2585     assert(!Chunked && "chunk was specified but schedule kind not known");
2586     return Ordered ? OMP_ord_static : OMP_sch_static;
2587   }
2588   llvm_unreachable("Unexpected runtime schedule");
2589 }
2590 
2591 /// Map the OpenMP distribute schedule to the runtime enumeration.
2592 static OpenMPSchedType
2593 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2594   // only static is allowed for dist_schedule
2595   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2596 }
2597 
2598 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2599                                          bool Chunked) const {
2600   OpenMPSchedType Schedule =
2601       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2602   return Schedule == OMP_sch_static;
2603 }
2604 
2605 bool CGOpenMPRuntime::isStaticNonchunked(
2606     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2607   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2608   return Schedule == OMP_dist_sch_static;
2609 }
2610 
2611 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2612                                       bool Chunked) const {
2613   OpenMPSchedType Schedule =
2614       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2615   return Schedule == OMP_sch_static_chunked;
2616 }
2617 
2618 bool CGOpenMPRuntime::isStaticChunked(
2619     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2620   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2621   return Schedule == OMP_dist_sch_static_chunked;
2622 }
2623 
2624 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2625   OpenMPSchedType Schedule =
2626       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2627   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2628   return Schedule != OMP_sch_static;
2629 }
2630 
2631 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2632                                   OpenMPScheduleClauseModifier M1,
2633                                   OpenMPScheduleClauseModifier M2) {
2634   int Modifier = 0;
2635   switch (M1) {
2636   case OMPC_SCHEDULE_MODIFIER_monotonic:
2637     Modifier = OMP_sch_modifier_monotonic;
2638     break;
2639   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2640     Modifier = OMP_sch_modifier_nonmonotonic;
2641     break;
2642   case OMPC_SCHEDULE_MODIFIER_simd:
2643     if (Schedule == OMP_sch_static_chunked)
2644       Schedule = OMP_sch_static_balanced_chunked;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_last:
2647   case OMPC_SCHEDULE_MODIFIER_unknown:
2648     break;
2649   }
2650   switch (M2) {
2651   case OMPC_SCHEDULE_MODIFIER_monotonic:
2652     Modifier = OMP_sch_modifier_monotonic;
2653     break;
2654   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2655     Modifier = OMP_sch_modifier_nonmonotonic;
2656     break;
2657   case OMPC_SCHEDULE_MODIFIER_simd:
2658     if (Schedule == OMP_sch_static_chunked)
2659       Schedule = OMP_sch_static_balanced_chunked;
2660     break;
2661   case OMPC_SCHEDULE_MODIFIER_last:
2662   case OMPC_SCHEDULE_MODIFIER_unknown:
2663     break;
2664   }
2665   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2666   // If the static schedule kind is specified or if the ordered clause is
2667   // specified, and if the nonmonotonic modifier is not specified, the effect is
2668   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2669   // modifier is specified, the effect is as if the nonmonotonic modifier is
2670   // specified.
2671   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2672     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2673           Schedule == OMP_sch_static_balanced_chunked ||
2674           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2675           Schedule == OMP_dist_sch_static_chunked ||
2676           Schedule == OMP_dist_sch_static))
2677       Modifier = OMP_sch_modifier_nonmonotonic;
2678   }
2679   return Schedule | Modifier;
2680 }
2681 
2682 void CGOpenMPRuntime::emitForDispatchInit(
2683     CodeGenFunction &CGF, SourceLocation Loc,
2684     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2685     bool Ordered, const DispatchRTInput &DispatchValues) {
2686   if (!CGF.HaveInsertPoint())
2687     return;
2688   OpenMPSchedType Schedule = getRuntimeSchedule(
2689       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2690   assert(Ordered ||
2691          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2692           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2693           Schedule != OMP_sch_static_balanced_chunked));
2694   // Call __kmpc_dispatch_init(
2695   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2696   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2697   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2698 
2699   // If the Chunk was not specified in the clause - use default value 1.
2700   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2701                                             : CGF.Builder.getIntN(IVSize, 1);
2702   llvm::Value *Args[] = {
2703       emitUpdateLocation(CGF, Loc),
2704       getThreadID(CGF, Loc),
2705       CGF.Builder.getInt32(addMonoNonMonoModifier(
2706           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2707       DispatchValues.LB,                                     // Lower
2708       DispatchValues.UB,                                     // Upper
2709       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2710       Chunk                                                  // Chunk
2711   };
2712   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2713 }
2714 
2715 static void emitForStaticInitCall(
2716     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2717     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2718     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2719     const CGOpenMPRuntime::StaticRTInput &Values) {
2720   if (!CGF.HaveInsertPoint())
2721     return;
2722 
2723   assert(!Values.Ordered);
2724   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2725          Schedule == OMP_sch_static_balanced_chunked ||
2726          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2727          Schedule == OMP_dist_sch_static ||
2728          Schedule == OMP_dist_sch_static_chunked);
2729 
2730   // Call __kmpc_for_static_init(
2731   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2732   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2733   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2734   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2735   llvm::Value *Chunk = Values.Chunk;
2736   if (Chunk == nullptr) {
2737     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2738             Schedule == OMP_dist_sch_static) &&
2739            "expected static non-chunked schedule");
2740     // If the Chunk was not specified in the clause - use default value 1.
2741     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2742   } else {
2743     assert((Schedule == OMP_sch_static_chunked ||
2744             Schedule == OMP_sch_static_balanced_chunked ||
2745             Schedule == OMP_ord_static_chunked ||
2746             Schedule == OMP_dist_sch_static_chunked) &&
2747            "expected static chunked schedule");
2748   }
2749   llvm::Value *Args[] = {
2750       UpdateLocation,
2751       ThreadId,
2752       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2753                                                   M2)), // Schedule type
2754       Values.IL.getPointer(),                           // &isLastIter
2755       Values.LB.getPointer(),                           // &LB
2756       Values.UB.getPointer(),                           // &UB
2757       Values.ST.getPointer(),                           // &Stride
2758       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2759       Chunk                                             // Chunk
2760   };
2761   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2762 }
2763 
2764 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2765                                         SourceLocation Loc,
2766                                         OpenMPDirectiveKind DKind,
2767                                         const OpenMPScheduleTy &ScheduleKind,
2768                                         const StaticRTInput &Values) {
2769   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2770       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2771   assert(isOpenMPWorksharingDirective(DKind) &&
2772          "Expected loop-based or sections-based directive.");
2773   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2774                                              isOpenMPLoopDirective(DKind)
2775                                                  ? OMP_IDENT_WORK_LOOP
2776                                                  : OMP_IDENT_WORK_SECTIONS);
2777   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2778   llvm::FunctionCallee StaticInitFunction =
2779       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2780   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2781   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2782                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2783 }
2784 
2785 void CGOpenMPRuntime::emitDistributeStaticInit(
2786     CodeGenFunction &CGF, SourceLocation Loc,
2787     OpenMPDistScheduleClauseKind SchedKind,
2788     const CGOpenMPRuntime::StaticRTInput &Values) {
2789   OpenMPSchedType ScheduleNum =
2790       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2791   llvm::Value *UpdatedLocation =
2792       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2793   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2794   llvm::FunctionCallee StaticInitFunction =
2795       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2796   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2797                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2798                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2799 }
2800 
2801 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2802                                           SourceLocation Loc,
2803                                           OpenMPDirectiveKind DKind) {
2804   if (!CGF.HaveInsertPoint())
2805     return;
2806   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2807   llvm::Value *Args[] = {
2808       emitUpdateLocation(CGF, Loc,
2809                          isOpenMPDistributeDirective(DKind)
2810                              ? OMP_IDENT_WORK_DISTRIBUTE
2811                              : isOpenMPLoopDirective(DKind)
2812                                    ? OMP_IDENT_WORK_LOOP
2813                                    : OMP_IDENT_WORK_SECTIONS),
2814       getThreadID(CGF, Loc)};
2815   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2816   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2817                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2818                       Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2822                                                  SourceLocation Loc,
2823                                                  unsigned IVSize,
2824                                                  bool IVSigned) {
2825   if (!CGF.HaveInsertPoint())
2826     return;
2827   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2828   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2829   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2830 }
2831 
2832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2833                                           SourceLocation Loc, unsigned IVSize,
2834                                           bool IVSigned, Address IL,
2835                                           Address LB, Address UB,
2836                                           Address ST) {
2837   // Call __kmpc_dispatch_next(
2838   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2839   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2840   //          kmp_int[32|64] *p_stride);
2841   llvm::Value *Args[] = {
2842       emitUpdateLocation(CGF, Loc),
2843       getThreadID(CGF, Loc),
2844       IL.getPointer(), // &isLastIter
2845       LB.getPointer(), // &Lower
2846       UB.getPointer(), // &Upper
2847       ST.getPointer()  // &Stride
2848   };
2849   llvm::Value *Call =
2850       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2851   return CGF.EmitScalarConversion(
2852       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2853       CGF.getContext().BoolTy, Loc);
2854 }
2855 
2856 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2857                                            llvm::Value *NumThreads,
2858                                            SourceLocation Loc) {
2859   if (!CGF.HaveInsertPoint())
2860     return;
2861   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2862   llvm::Value *Args[] = {
2863       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2864       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2865   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2866                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2867                       Args);
2868 }
2869 
2870 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2871                                          ProcBindKind ProcBind,
2872                                          SourceLocation Loc) {
2873   if (!CGF.HaveInsertPoint())
2874     return;
2875   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2876   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2877   llvm::Value *Args[] = {
2878       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2879       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2880   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2881                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2882                       Args);
2883 }
2884 
2885 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2886                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2887   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2888     OMPBuilder.createFlush(CGF.Builder);
2889   } else {
2890     if (!CGF.HaveInsertPoint())
2891       return;
2892     // Build call void __kmpc_flush(ident_t *loc)
2893     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2894                             CGM.getModule(), OMPRTL___kmpc_flush),
2895                         emitUpdateLocation(CGF, Loc));
2896   }
2897 }
2898 
2899 namespace {
2900 /// Indexes of fields for type kmp_task_t.
2901 enum KmpTaskTFields {
2902   /// List of shared variables.
2903   KmpTaskTShareds,
2904   /// Task routine.
2905   KmpTaskTRoutine,
2906   /// Partition id for the untied tasks.
2907   KmpTaskTPartId,
2908   /// Function with call of destructors for private variables.
2909   Data1,
2910   /// Task priority.
2911   Data2,
2912   /// (Taskloops only) Lower bound.
2913   KmpTaskTLowerBound,
2914   /// (Taskloops only) Upper bound.
2915   KmpTaskTUpperBound,
2916   /// (Taskloops only) Stride.
2917   KmpTaskTStride,
2918   /// (Taskloops only) Is last iteration flag.
2919   KmpTaskTLastIter,
2920   /// (Taskloops only) Reduction data.
2921   KmpTaskTReductions,
2922 };
2923 } // anonymous namespace
2924 
2925 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2926   return OffloadEntriesTargetRegion.empty() &&
2927          OffloadEntriesDeviceGlobalVar.empty();
2928 }
2929 
2930 /// Initialize target region entry.
2931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2932     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2933                                     StringRef ParentName, unsigned LineNum,
2934                                     unsigned Order) {
2935   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2936                                              "only required for the device "
2937                                              "code generation.");
2938   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2939       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2940                                    OMPTargetRegionEntryTargetRegion);
2941   ++OffloadingEntriesNum;
2942 }
2943 
2944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2945     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2946                                   StringRef ParentName, unsigned LineNum,
2947                                   llvm::Constant *Addr, llvm::Constant *ID,
2948                                   OMPTargetRegionEntryKind Flags) {
2949   // If we are emitting code for a target, the entry is already initialized,
2950   // only has to be registered.
2951   if (CGM.getLangOpts().OpenMPIsDevice) {
2952     // This could happen if the device compilation is invoked standalone.
2953     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2954       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2955                                       OffloadingEntriesNum);
2956     auto &Entry =
2957         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2958     Entry.setAddress(Addr);
2959     Entry.setID(ID);
2960     Entry.setFlags(Flags);
2961   } else {
2962     if (Flags ==
2963             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2964         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2965                                  /*IgnoreAddressId*/ true))
2966       return;
2967     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2968            "Target region entry already registered!");
2969     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2970     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2971     ++OffloadingEntriesNum;
2972   }
2973 }
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2976     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2977     bool IgnoreAddressId) const {
2978   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2979   if (PerDevice == OffloadEntriesTargetRegion.end())
2980     return false;
2981   auto PerFile = PerDevice->second.find(FileID);
2982   if (PerFile == PerDevice->second.end())
2983     return false;
2984   auto PerParentName = PerFile->second.find(ParentName);
2985   if (PerParentName == PerFile->second.end())
2986     return false;
2987   auto PerLine = PerParentName->second.find(LineNum);
2988   if (PerLine == PerParentName->second.end())
2989     return false;
2990   // Fail if this entry is already registered.
2991   if (!IgnoreAddressId &&
2992       (PerLine->second.getAddress() || PerLine->second.getID()))
2993     return false;
2994   return true;
2995 }
2996 
2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2998     const OffloadTargetRegionEntryInfoActTy &Action) {
2999   // Scan all target region entries and perform the provided action.
3000   for (const auto &D : OffloadEntriesTargetRegion)
3001     for (const auto &F : D.second)
3002       for (const auto &P : F.second)
3003         for (const auto &L : P.second)
3004           Action(D.first, F.first, P.first(), L.first, L.second);
3005 }
3006 
3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3008     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3009                                        OMPTargetGlobalVarEntryKind Flags,
3010                                        unsigned Order) {
3011   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3012                                              "only required for the device "
3013                                              "code generation.");
3014   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3015   ++OffloadingEntriesNum;
3016 }
3017 
3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3020                                      CharUnits VarSize,
3021                                      OMPTargetGlobalVarEntryKind Flags,
3022                                      llvm::GlobalValue::LinkageTypes Linkage) {
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasDeviceGlobalVarEntryInfo(VarName))
3026       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3027     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3028     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3029            "Resetting with the new address.");
3030     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3031       if (Entry.getVarSize().isZero()) {
3032         Entry.setVarSize(VarSize);
3033         Entry.setLinkage(Linkage);
3034       }
3035       return;
3036     }
3037     Entry.setVarSize(VarSize);
3038     Entry.setLinkage(Linkage);
3039     Entry.setAddress(Addr);
3040   } else {
3041     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3042       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3043       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3044              "Entry not initialized!");
3045       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3046              "Resetting with the new address.");
3047       if (Entry.getVarSize().isZero()) {
3048         Entry.setVarSize(VarSize);
3049         Entry.setLinkage(Linkage);
3050       }
3051       return;
3052     }
3053     OffloadEntriesDeviceGlobalVar.try_emplace(
3054         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3055     ++OffloadingEntriesNum;
3056   }
3057 }
3058 
3059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3060     actOnDeviceGlobalVarEntriesInfo(
3061         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3062   // Scan all target region entries and perform the provided action.
3063   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3064     Action(E.getKey(), E.getValue());
3065 }
3066 
3067 void CGOpenMPRuntime::createOffloadEntry(
3068     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3069     llvm::GlobalValue::LinkageTypes Linkage) {
3070   StringRef Name = Addr->getName();
3071   llvm::Module &M = CGM.getModule();
3072   llvm::LLVMContext &C = M.getContext();
3073 
3074   // Create constant string with the name.
3075   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3076 
3077   std::string StringName = getName({"omp_offloading", "entry_name"});
3078   auto *Str = new llvm::GlobalVariable(
3079       M, StrPtrInit->getType(), /*isConstant=*/true,
3080       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3081   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3082 
3083   llvm::Constant *Data[] = {
3084       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3085       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3086       llvm::ConstantInt::get(CGM.SizeTy, Size),
3087       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3088       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3089   std::string EntryName = getName({"omp_offloading", "entry", ""});
3090   llvm::GlobalVariable *Entry = createGlobalStruct(
3091       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3092       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3093 
3094   // The entry has to be created in the section the linker expects it to be.
3095   Entry->setSection("omp_offloading_entries");
3096 }
3097 
3098 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3099   // Emit the offloading entries and metadata so that the device codegen side
3100   // can easily figure out what to emit. The produced metadata looks like
3101   // this:
3102   //
3103   // !omp_offload.info = !{!1, ...}
3104   //
3105   // Right now we only generate metadata for function that contain target
3106   // regions.
3107 
3108   // If we are in simd mode or there are no entries, we don't need to do
3109   // anything.
3110   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3111     return;
3112 
3113   llvm::Module &M = CGM.getModule();
3114   llvm::LLVMContext &C = M.getContext();
3115   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3116                          SourceLocation, StringRef>,
3117               16>
3118       OrderedEntries(OffloadEntriesInfoManager.size());
3119   llvm::SmallVector<StringRef, 16> ParentFunctions(
3120       OffloadEntriesInfoManager.size());
3121 
3122   // Auxiliary methods to create metadata values and strings.
3123   auto &&GetMDInt = [this](unsigned V) {
3124     return llvm::ConstantAsMetadata::get(
3125         llvm::ConstantInt::get(CGM.Int32Ty, V));
3126   };
3127 
3128   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3129 
3130   // Create the offloading info metadata node.
3131   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3132 
3133   // Create function that emits metadata for each target region entry;
3134   auto &&TargetRegionMetadataEmitter =
3135       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3136        &GetMDString](
3137           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3138           unsigned Line,
3139           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3140         // Generate metadata for target regions. Each entry of this metadata
3141         // contains:
3142         // - Entry 0 -> Kind of this type of metadata (0).
3143         // - Entry 1 -> Device ID of the file where the entry was identified.
3144         // - Entry 2 -> File ID of the file where the entry was identified.
3145         // - Entry 3 -> Mangled name of the function where the entry was
3146         // identified.
3147         // - Entry 4 -> Line in the file where the entry was identified.
3148         // - Entry 5 -> Order the entry was created.
3149         // The first element of the metadata node is the kind.
3150         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3151                                  GetMDInt(FileID),      GetMDString(ParentName),
3152                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3153 
3154         SourceLocation Loc;
3155         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3156                   E = CGM.getContext().getSourceManager().fileinfo_end();
3157              I != E; ++I) {
3158           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3159               I->getFirst()->getUniqueID().getFile() == FileID) {
3160             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3161                 I->getFirst(), Line, 1);
3162             break;
3163           }
3164         }
3165         // Save this entry in the right position of the ordered entries array.
3166         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3167         ParentFunctions[E.getOrder()] = ParentName;
3168 
3169         // Add metadata to the named metadata node.
3170         MD->addOperand(llvm::MDNode::get(C, Ops));
3171       };
3172 
3173   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3174       TargetRegionMetadataEmitter);
3175 
3176   // Create function that emits metadata for each device global variable entry;
3177   auto &&DeviceGlobalVarMetadataEmitter =
3178       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3179        MD](StringRef MangledName,
3180            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3181                &E) {
3182         // Generate metadata for global variables. Each entry of this metadata
3183         // contains:
3184         // - Entry 0 -> Kind of this type of metadata (1).
3185         // - Entry 1 -> Mangled name of the variable.
3186         // - Entry 2 -> Declare target kind.
3187         // - Entry 3 -> Order the entry was created.
3188         // The first element of the metadata node is the kind.
3189         llvm::Metadata *Ops[] = {
3190             GetMDInt(E.getKind()), GetMDString(MangledName),
3191             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3192 
3193         // Save this entry in the right position of the ordered entries array.
3194         OrderedEntries[E.getOrder()] =
3195             std::make_tuple(&E, SourceLocation(), MangledName);
3196 
3197         // Add metadata to the named metadata node.
3198         MD->addOperand(llvm::MDNode::get(C, Ops));
3199       };
3200 
3201   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3202       DeviceGlobalVarMetadataEmitter);
3203 
3204   for (const auto &E : OrderedEntries) {
3205     assert(std::get<0>(E) && "All ordered entries must exist!");
3206     if (const auto *CE =
3207             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3208                 std::get<0>(E))) {
3209       if (!CE->getID() || !CE->getAddress()) {
3210         // Do not blame the entry if the parent funtion is not emitted.
3211         StringRef FnName = ParentFunctions[CE->getOrder()];
3212         if (!CGM.GetGlobalValue(FnName))
3213           continue;
3214         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3215             DiagnosticsEngine::Error,
3216             "Offloading entry for target region in %0 is incorrect: either the "
3217             "address or the ID is invalid.");
3218         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3219         continue;
3220       }
3221       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3222                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3223     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3224                                              OffloadEntryInfoDeviceGlobalVar>(
3225                    std::get<0>(E))) {
3226       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3227           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3228               CE->getFlags());
3229       switch (Flags) {
3230       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3231         if (CGM.getLangOpts().OpenMPIsDevice &&
3232             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3233           continue;
3234         if (!CE->getAddress()) {
3235           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3236               DiagnosticsEngine::Error, "Offloading entry for declare target "
3237                                         "variable %0 is incorrect: the "
3238                                         "address is invalid.");
3239           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3240           continue;
3241         }
3242         // The vaiable has no definition - no need to add the entry.
3243         if (CE->getVarSize().isZero())
3244           continue;
3245         break;
3246       }
3247       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3248         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3249                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3250                "Declaret target link address is set.");
3251         if (CGM.getLangOpts().OpenMPIsDevice)
3252           continue;
3253         if (!CE->getAddress()) {
3254           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255               DiagnosticsEngine::Error,
3256               "Offloading entry for declare target variable is incorrect: the "
3257               "address is invalid.");
3258           CGM.getDiags().Report(DiagID);
3259           continue;
3260         }
3261         break;
3262       }
3263       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3264                          CE->getVarSize().getQuantity(), Flags,
3265                          CE->getLinkage());
3266     } else {
3267       llvm_unreachable("Unsupported entry kind.");
3268     }
3269   }
3270 }
3271 
3272 /// Loads all the offload entries information from the host IR
3273 /// metadata.
3274 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3275   // If we are in target mode, load the metadata from the host IR. This code has
3276   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3277 
3278   if (!CGM.getLangOpts().OpenMPIsDevice)
3279     return;
3280 
3281   if (CGM.getLangOpts().OMPHostIRFile.empty())
3282     return;
3283 
3284   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3285   if (auto EC = Buf.getError()) {
3286     CGM.getDiags().Report(diag::err_cannot_open_file)
3287         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3288     return;
3289   }
3290 
3291   llvm::LLVMContext C;
3292   auto ME = expectedToErrorOrAndEmitErrors(
3293       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3294 
3295   if (auto EC = ME.getError()) {
3296     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3297         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3298     CGM.getDiags().Report(DiagID)
3299         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3300     return;
3301   }
3302 
3303   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3304   if (!MD)
3305     return;
3306 
3307   for (llvm::MDNode *MN : MD->operands()) {
3308     auto &&GetMDInt = [MN](unsigned Idx) {
3309       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3310       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3311     };
3312 
3313     auto &&GetMDString = [MN](unsigned Idx) {
3314       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3315       return V->getString();
3316     };
3317 
3318     switch (GetMDInt(0)) {
3319     default:
3320       llvm_unreachable("Unexpected metadata!");
3321       break;
3322     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3323         OffloadingEntryInfoTargetRegion:
3324       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3325           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3326           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3327           /*Order=*/GetMDInt(5));
3328       break;
3329     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3330         OffloadingEntryInfoDeviceGlobalVar:
3331       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3332           /*MangledName=*/GetMDString(1),
3333           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3334               /*Flags=*/GetMDInt(2)),
3335           /*Order=*/GetMDInt(3));
3336       break;
3337     }
3338   }
3339 }
3340 
3341 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3342   if (!KmpRoutineEntryPtrTy) {
3343     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3344     ASTContext &C = CGM.getContext();
3345     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3346     FunctionProtoType::ExtProtoInfo EPI;
3347     KmpRoutineEntryPtrQTy = C.getPointerType(
3348         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3349     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3350   }
3351 }
3352 
3353 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3354   // Make sure the type of the entry is already created. This is the type we
3355   // have to create:
3356   // struct __tgt_offload_entry{
3357   //   void      *addr;       // Pointer to the offload entry info.
3358   //                          // (function or global)
3359   //   char      *name;       // Name of the function or global.
3360   //   size_t     size;       // Size of the entry info (0 if it a function).
3361   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3362   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3363   // };
3364   if (TgtOffloadEntryQTy.isNull()) {
3365     ASTContext &C = CGM.getContext();
3366     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3367     RD->startDefinition();
3368     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3369     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3370     addFieldToRecordDecl(C, RD, C.getSizeType());
3371     addFieldToRecordDecl(
3372         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3373     addFieldToRecordDecl(
3374         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3375     RD->completeDefinition();
3376     RD->addAttr(PackedAttr::CreateImplicit(C));
3377     TgtOffloadEntryQTy = C.getRecordType(RD);
3378   }
3379   return TgtOffloadEntryQTy;
3380 }
3381 
3382 namespace {
3383 struct PrivateHelpersTy {
3384   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3385                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3386       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3387         PrivateElemInit(PrivateElemInit) {}
3388   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3389   const Expr *OriginalRef = nullptr;
3390   const VarDecl *Original = nullptr;
3391   const VarDecl *PrivateCopy = nullptr;
3392   const VarDecl *PrivateElemInit = nullptr;
3393   bool isLocalPrivate() const {
3394     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3395   }
3396 };
3397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3398 } // anonymous namespace
3399 
3400 static bool isAllocatableDecl(const VarDecl *VD) {
3401   const VarDecl *CVD = VD->getCanonicalDecl();
3402   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3403     return false;
3404   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3405   // Use the default allocation.
3406   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3407             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3408            !AA->getAllocator());
3409 }
3410 
3411 static RecordDecl *
3412 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3413   if (!Privates.empty()) {
3414     ASTContext &C = CGM.getContext();
3415     // Build struct .kmp_privates_t. {
3416     //         /*  private vars  */
3417     //       };
3418     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3419     RD->startDefinition();
3420     for (const auto &Pair : Privates) {
3421       const VarDecl *VD = Pair.second.Original;
3422       QualType Type = VD->getType().getNonReferenceType();
3423       // If the private variable is a local variable with lvalue ref type,
3424       // allocate the pointer instead of the pointee type.
3425       if (Pair.second.isLocalPrivate()) {
3426         if (VD->getType()->isLValueReferenceType())
3427           Type = C.getPointerType(Type);
3428         if (isAllocatableDecl(VD))
3429           Type = C.getPointerType(Type);
3430       }
3431       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3432       if (VD->hasAttrs()) {
3433         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3434              E(VD->getAttrs().end());
3435              I != E; ++I)
3436           FD->addAttr(*I);
3437       }
3438     }
3439     RD->completeDefinition();
3440     return RD;
3441   }
3442   return nullptr;
3443 }
3444 
3445 static RecordDecl *
3446 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3447                          QualType KmpInt32Ty,
3448                          QualType KmpRoutineEntryPointerQTy) {
3449   ASTContext &C = CGM.getContext();
3450   // Build struct kmp_task_t {
3451   //         void *              shareds;
3452   //         kmp_routine_entry_t routine;
3453   //         kmp_int32           part_id;
3454   //         kmp_cmplrdata_t data1;
3455   //         kmp_cmplrdata_t data2;
3456   // For taskloops additional fields:
3457   //         kmp_uint64          lb;
3458   //         kmp_uint64          ub;
3459   //         kmp_int64           st;
3460   //         kmp_int32           liter;
3461   //         void *              reductions;
3462   //       };
3463   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3464   UD->startDefinition();
3465   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3466   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3467   UD->completeDefinition();
3468   QualType KmpCmplrdataTy = C.getRecordType(UD);
3469   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3470   RD->startDefinition();
3471   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3472   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3473   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3474   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3475   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3476   if (isOpenMPTaskLoopDirective(Kind)) {
3477     QualType KmpUInt64Ty =
3478         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3479     QualType KmpInt64Ty =
3480         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3481     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3482     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3483     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3484     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3485     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3486   }
3487   RD->completeDefinition();
3488   return RD;
3489 }
3490 
3491 static RecordDecl *
3492 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3493                                      ArrayRef<PrivateDataTy> Privates) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t_with_privates {
3496   //         kmp_task_t task_data;
3497   //         .kmp_privates_t. privates;
3498   //       };
3499   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3500   RD->startDefinition();
3501   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3502   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3503     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3504   RD->completeDefinition();
3505   return RD;
3506 }
3507 
3508 /// Emit a proxy function which accepts kmp_task_t as the second
3509 /// argument.
3510 /// \code
3511 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3512 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3513 ///   For taskloops:
3514 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3515 ///   tt->reductions, tt->shareds);
3516 ///   return 0;
3517 /// }
3518 /// \endcode
3519 static llvm::Function *
3520 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3521                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3522                       QualType KmpTaskTWithPrivatesPtrQTy,
3523                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3524                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3525                       llvm::Value *TaskPrivatesMap) {
3526   ASTContext &C = CGM.getContext();
3527   FunctionArgList Args;
3528   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3529                             ImplicitParamDecl::Other);
3530   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3531                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3532                                 ImplicitParamDecl::Other);
3533   Args.push_back(&GtidArg);
3534   Args.push_back(&TaskTypeArg);
3535   const auto &TaskEntryFnInfo =
3536       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3537   llvm::FunctionType *TaskEntryTy =
3538       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3539   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3540   auto *TaskEntry = llvm::Function::Create(
3541       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3542   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3543   TaskEntry->setDoesNotRecurse();
3544   CodeGenFunction CGF(CGM);
3545   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3546                     Loc, Loc);
3547 
3548   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3549   // tt,
3550   // For taskloops:
3551   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3552   // tt->task_data.shareds);
3553   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3554       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3555   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3556       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3557       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3558   const auto *KmpTaskTWithPrivatesQTyRD =
3559       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3560   LValue Base =
3561       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3562   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3563   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3564   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3565   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3566 
3567   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3568   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3569   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3570       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3571       CGF.ConvertTypeForMem(SharedsPtrTy));
3572 
3573   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3574   llvm::Value *PrivatesParam;
3575   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3576     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3577     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3578         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3579   } else {
3580     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3581   }
3582 
3583   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3584                                TaskPrivatesMap,
3585                                CGF.Builder
3586                                    .CreatePointerBitCastOrAddrSpaceCast(
3587                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3588                                    .getPointer()};
3589   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3590                                           std::end(CommonArgs));
3591   if (isOpenMPTaskLoopDirective(Kind)) {
3592     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3593     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3594     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3595     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3596     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3597     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3598     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3599     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3600     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3601     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3602     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3603     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3604     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3605     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3606     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3607     CallArgs.push_back(LBParam);
3608     CallArgs.push_back(UBParam);
3609     CallArgs.push_back(StParam);
3610     CallArgs.push_back(LIParam);
3611     CallArgs.push_back(RParam);
3612   }
3613   CallArgs.push_back(SharedsParam);
3614 
3615   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3616                                                   CallArgs);
3617   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3618                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3619   CGF.FinishFunction();
3620   return TaskEntry;
3621 }
3622 
3623 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3624                                             SourceLocation Loc,
3625                                             QualType KmpInt32Ty,
3626                                             QualType KmpTaskTWithPrivatesPtrQTy,
3627                                             QualType KmpTaskTWithPrivatesQTy) {
3628   ASTContext &C = CGM.getContext();
3629   FunctionArgList Args;
3630   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3631                             ImplicitParamDecl::Other);
3632   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3633                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3634                                 ImplicitParamDecl::Other);
3635   Args.push_back(&GtidArg);
3636   Args.push_back(&TaskTypeArg);
3637   const auto &DestructorFnInfo =
3638       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3639   llvm::FunctionType *DestructorFnTy =
3640       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3641   std::string Name =
3642       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3643   auto *DestructorFn =
3644       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3645                              Name, &CGM.getModule());
3646   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3647                                     DestructorFnInfo);
3648   DestructorFn->setDoesNotRecurse();
3649   CodeGenFunction CGF(CGM);
3650   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3651                     Args, Loc, Loc);
3652 
3653   LValue Base = CGF.EmitLoadOfPointerLValue(
3654       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3655       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3656   const auto *KmpTaskTWithPrivatesQTyRD =
3657       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3658   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3659   Base = CGF.EmitLValueForField(Base, *FI);
3660   for (const auto *Field :
3661        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3662     if (QualType::DestructionKind DtorKind =
3663             Field->getType().isDestructedType()) {
3664       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3665       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3666     }
3667   }
3668   CGF.FinishFunction();
3669   return DestructorFn;
3670 }
3671 
3672 /// Emit a privates mapping function for correct handling of private and
3673 /// firstprivate variables.
3674 /// \code
3675 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3676 /// **noalias priv1,...,  <tyn> **noalias privn) {
3677 ///   *priv1 = &.privates.priv1;
3678 ///   ...;
3679 ///   *privn = &.privates.privn;
3680 /// }
3681 /// \endcode
3682 static llvm::Value *
3683 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3684                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3685                                ArrayRef<PrivateDataTy> Privates) {
3686   ASTContext &C = CGM.getContext();
3687   FunctionArgList Args;
3688   ImplicitParamDecl TaskPrivatesArg(
3689       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3690       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3691       ImplicitParamDecl::Other);
3692   Args.push_back(&TaskPrivatesArg);
3693   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3694   unsigned Counter = 1;
3695   for (const Expr *E : Data.PrivateVars) {
3696     Args.push_back(ImplicitParamDecl::Create(
3697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3698         C.getPointerType(C.getPointerType(E->getType()))
3699             .withConst()
3700             .withRestrict(),
3701         ImplicitParamDecl::Other));
3702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3703     PrivateVarsPos[VD] = Counter;
3704     ++Counter;
3705   }
3706   for (const Expr *E : Data.FirstprivateVars) {
3707     Args.push_back(ImplicitParamDecl::Create(
3708         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3709         C.getPointerType(C.getPointerType(E->getType()))
3710             .withConst()
3711             .withRestrict(),
3712         ImplicitParamDecl::Other));
3713     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   for (const Expr *E : Data.LastprivateVars) {
3718     Args.push_back(ImplicitParamDecl::Create(
3719         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3720         C.getPointerType(C.getPointerType(E->getType()))
3721             .withConst()
3722             .withRestrict(),
3723         ImplicitParamDecl::Other));
3724     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3725     PrivateVarsPos[VD] = Counter;
3726     ++Counter;
3727   }
3728   for (const VarDecl *VD : Data.PrivateLocals) {
3729     QualType Ty = VD->getType().getNonReferenceType();
3730     if (VD->getType()->isLValueReferenceType())
3731       Ty = C.getPointerType(Ty);
3732     if (isAllocatableDecl(VD))
3733       Ty = C.getPointerType(Ty);
3734     Args.push_back(ImplicitParamDecl::Create(
3735         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3736         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3737         ImplicitParamDecl::Other));
3738     PrivateVarsPos[VD] = Counter;
3739     ++Counter;
3740   }
3741   const auto &TaskPrivatesMapFnInfo =
3742       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3743   llvm::FunctionType *TaskPrivatesMapTy =
3744       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3745   std::string Name =
3746       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3747   auto *TaskPrivatesMap = llvm::Function::Create(
3748       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3749       &CGM.getModule());
3750   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3751                                     TaskPrivatesMapFnInfo);
3752   if (CGM.getLangOpts().Optimize) {
3753     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3754     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3755     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3756   }
3757   CodeGenFunction CGF(CGM);
3758   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3759                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3760 
3761   // *privi = &.privates.privi;
3762   LValue Base = CGF.EmitLoadOfPointerLValue(
3763       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3764       TaskPrivatesArg.getType()->castAs<PointerType>());
3765   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3766   Counter = 0;
3767   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3768     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3769     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3770     LValue RefLVal =
3771         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3772     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3773         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3774     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3775     ++Counter;
3776   }
3777   CGF.FinishFunction();
3778   return TaskPrivatesMap;
3779 }
3780 
3781 /// Emit initialization for private variables in task-based directives.
3782 static void emitPrivatesInit(CodeGenFunction &CGF,
3783                              const OMPExecutableDirective &D,
3784                              Address KmpTaskSharedsPtr, LValue TDBase,
3785                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3786                              QualType SharedsTy, QualType SharedsPtrTy,
3787                              const OMPTaskDataTy &Data,
3788                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3789   ASTContext &C = CGF.getContext();
3790   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3791   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3792   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3793                                  ? OMPD_taskloop
3794                                  : OMPD_task;
3795   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3796   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3797   LValue SrcBase;
3798   bool IsTargetTask =
3799       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3800       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3801   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3802   // PointersArray, SizesArray, and MappersArray. The original variables for
3803   // these arrays are not captured and we get their addresses explicitly.
3804   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3805       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3806     SrcBase = CGF.MakeAddrLValue(
3807         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3808             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3809         SharedsTy);
3810   }
3811   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3812   for (const PrivateDataTy &Pair : Privates) {
3813     // Do not initialize private locals.
3814     if (Pair.second.isLocalPrivate()) {
3815       ++FI;
3816       continue;
3817     }
3818     const VarDecl *VD = Pair.second.PrivateCopy;
3819     const Expr *Init = VD->getAnyInitializer();
3820     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3821                              !CGF.isTrivialInitializer(Init)))) {
3822       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3823       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3824         const VarDecl *OriginalVD = Pair.second.Original;
3825         // Check if the variable is the target-based BasePointersArray,
3826         // PointersArray, SizesArray, or MappersArray.
3827         LValue SharedRefLValue;
3828         QualType Type = PrivateLValue.getType();
3829         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3830         if (IsTargetTask && !SharedField) {
3831           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3832                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3833                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3834                          ->getNumParams() == 0 &&
3835                  isa<TranslationUnitDecl>(
3836                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3837                          ->getDeclContext()) &&
3838                  "Expected artificial target data variable.");
3839           SharedRefLValue =
3840               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3841         } else if (ForDup) {
3842           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3843           SharedRefLValue = CGF.MakeAddrLValue(
3844               Address(SharedRefLValue.getPointer(CGF),
3845                       C.getDeclAlign(OriginalVD)),
3846               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3847               SharedRefLValue.getTBAAInfo());
3848         } else if (CGF.LambdaCaptureFields.count(
3849                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3850                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3851           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3852         } else {
3853           // Processing for implicitly captured variables.
3854           InlinedOpenMPRegionRAII Region(
3855               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3856               /*HasCancel=*/false);
3857           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3858         }
3859         if (Type->isArrayType()) {
3860           // Initialize firstprivate array.
3861           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3862             // Perform simple memcpy.
3863             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3864           } else {
3865             // Initialize firstprivate array using element-by-element
3866             // initialization.
3867             CGF.EmitOMPAggregateAssign(
3868                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3869                 Type,
3870                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3871                                                   Address SrcElement) {
3872                   // Clean up any temporaries needed by the initialization.
3873                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3874                   InitScope.addPrivate(
3875                       Elem, [SrcElement]() -> Address { return SrcElement; });
3876                   (void)InitScope.Privatize();
3877                   // Emit initialization for single element.
3878                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3879                       CGF, &CapturesInfo);
3880                   CGF.EmitAnyExprToMem(Init, DestElement,
3881                                        Init->getType().getQualifiers(),
3882                                        /*IsInitializer=*/false);
3883                 });
3884           }
3885         } else {
3886           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3887           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3888             return SharedRefLValue.getAddress(CGF);
3889           });
3890           (void)InitScope.Privatize();
3891           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3892           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3893                              /*capturedByInit=*/false);
3894         }
3895       } else {
3896         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3897       }
3898     }
3899     ++FI;
3900   }
3901 }
3902 
3903 /// Check if duplication function is required for taskloops.
3904 static bool checkInitIsRequired(CodeGenFunction &CGF,
3905                                 ArrayRef<PrivateDataTy> Privates) {
3906   bool InitRequired = false;
3907   for (const PrivateDataTy &Pair : Privates) {
3908     if (Pair.second.isLocalPrivate())
3909       continue;
3910     const VarDecl *VD = Pair.second.PrivateCopy;
3911     const Expr *Init = VD->getAnyInitializer();
3912     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3913                                     !CGF.isTrivialInitializer(Init));
3914     if (InitRequired)
3915       break;
3916   }
3917   return InitRequired;
3918 }
3919 
3920 
3921 /// Emit task_dup function (for initialization of
3922 /// private/firstprivate/lastprivate vars and last_iter flag)
3923 /// \code
3924 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3925 /// lastpriv) {
3926 /// // setup lastprivate flag
3927 ///    task_dst->last = lastpriv;
3928 /// // could be constructor calls here...
3929 /// }
3930 /// \endcode
3931 static llvm::Value *
3932 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3933                     const OMPExecutableDirective &D,
3934                     QualType KmpTaskTWithPrivatesPtrQTy,
3935                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3936                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3937                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3938                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3939   ASTContext &C = CGM.getContext();
3940   FunctionArgList Args;
3941   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3942                            KmpTaskTWithPrivatesPtrQTy,
3943                            ImplicitParamDecl::Other);
3944   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3945                            KmpTaskTWithPrivatesPtrQTy,
3946                            ImplicitParamDecl::Other);
3947   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3948                                 ImplicitParamDecl::Other);
3949   Args.push_back(&DstArg);
3950   Args.push_back(&SrcArg);
3951   Args.push_back(&LastprivArg);
3952   const auto &TaskDupFnInfo =
3953       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3954   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3955   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3956   auto *TaskDup = llvm::Function::Create(
3957       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3958   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3959   TaskDup->setDoesNotRecurse();
3960   CodeGenFunction CGF(CGM);
3961   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3962                     Loc);
3963 
3964   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3965       CGF.GetAddrOfLocalVar(&DstArg),
3966       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3967   // task_dst->liter = lastpriv;
3968   if (WithLastIter) {
3969     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3970     LValue Base = CGF.EmitLValueForField(
3971         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3972     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3973     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3974         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3975     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3976   }
3977 
3978   // Emit initial values for private copies (if any).
3979   assert(!Privates.empty());
3980   Address KmpTaskSharedsPtr = Address::invalid();
3981   if (!Data.FirstprivateVars.empty()) {
3982     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3983         CGF.GetAddrOfLocalVar(&SrcArg),
3984         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3985     LValue Base = CGF.EmitLValueForField(
3986         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3987     KmpTaskSharedsPtr = Address(
3988         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3989                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3990                                                   KmpTaskTShareds)),
3991                              Loc),
3992         CGM.getNaturalTypeAlignment(SharedsTy));
3993   }
3994   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3995                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3996   CGF.FinishFunction();
3997   return TaskDup;
3998 }
3999 
4000 /// Checks if destructor function is required to be generated.
4001 /// \return true if cleanups are required, false otherwise.
4002 static bool
4003 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4004                          ArrayRef<PrivateDataTy> Privates) {
4005   for (const PrivateDataTy &P : Privates) {
4006     if (P.second.isLocalPrivate())
4007       continue;
4008     QualType Ty = P.second.Original->getType().getNonReferenceType();
4009     if (Ty.isDestructedType())
4010       return true;
4011   }
4012   return false;
4013 }
4014 
4015 namespace {
4016 /// Loop generator for OpenMP iterator expression.
4017 class OMPIteratorGeneratorScope final
4018     : public CodeGenFunction::OMPPrivateScope {
4019   CodeGenFunction &CGF;
4020   const OMPIteratorExpr *E = nullptr;
4021   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4022   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4023   OMPIteratorGeneratorScope() = delete;
4024   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4025 
4026 public:
4027   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4028       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4029     if (!E)
4030       return;
4031     SmallVector<llvm::Value *, 4> Uppers;
4032     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4033       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4034       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4035       addPrivate(VD, [&CGF, VD]() {
4036         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4037       });
4038       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4039       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4040         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4041                                  "counter.addr");
4042       });
4043     }
4044     Privatize();
4045 
4046     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4047       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4048       LValue CLVal =
4049           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4050                              HelperData.CounterVD->getType());
4051       // Counter = 0;
4052       CGF.EmitStoreOfScalar(
4053           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4054           CLVal);
4055       CodeGenFunction::JumpDest &ContDest =
4056           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4057       CodeGenFunction::JumpDest &ExitDest =
4058           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4059       // N = <number-of_iterations>;
4060       llvm::Value *N = Uppers[I];
4061       // cont:
4062       // if (Counter < N) goto body; else goto exit;
4063       CGF.EmitBlock(ContDest.getBlock());
4064       auto *CVal =
4065           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4066       llvm::Value *Cmp =
4067           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4068               ? CGF.Builder.CreateICmpSLT(CVal, N)
4069               : CGF.Builder.CreateICmpULT(CVal, N);
4070       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4071       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4072       // body:
4073       CGF.EmitBlock(BodyBB);
4074       // Iteri = Begini + Counter * Stepi;
4075       CGF.EmitIgnoredExpr(HelperData.Update);
4076     }
4077   }
4078   ~OMPIteratorGeneratorScope() {
4079     if (!E)
4080       return;
4081     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4082       // Counter = Counter + 1;
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4084       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4085       // goto cont;
4086       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4087       // exit:
4088       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4089     }
4090   }
4091 };
4092 } // namespace
4093 
4094 static std::pair<llvm::Value *, llvm::Value *>
4095 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4096   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4097   llvm::Value *Addr;
4098   if (OASE) {
4099     const Expr *Base = OASE->getBase();
4100     Addr = CGF.EmitScalarExpr(Base);
4101   } else {
4102     Addr = CGF.EmitLValue(E).getPointer(CGF);
4103   }
4104   llvm::Value *SizeVal;
4105   QualType Ty = E->getType();
4106   if (OASE) {
4107     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4108     for (const Expr *SE : OASE->getDimensions()) {
4109       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4110       Sz = CGF.EmitScalarConversion(
4111           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4112       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4113     }
4114   } else if (const auto *ASE =
4115                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4116     LValue UpAddrLVal =
4117         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4118     llvm::Value *UpAddr =
4119         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4120     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4121     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4122     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4123   } else {
4124     SizeVal = CGF.getTypeSize(Ty);
4125   }
4126   return std::make_pair(Addr, SizeVal);
4127 }
4128 
4129 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4130 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4131   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4132   if (KmpTaskAffinityInfoTy.isNull()) {
4133     RecordDecl *KmpAffinityInfoRD =
4134         C.buildImplicitRecord("kmp_task_affinity_info_t");
4135     KmpAffinityInfoRD->startDefinition();
4136     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4137     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4138     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4139     KmpAffinityInfoRD->completeDefinition();
4140     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4141   }
4142 }
4143 
4144 CGOpenMPRuntime::TaskResultTy
4145 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4146                               const OMPExecutableDirective &D,
4147                               llvm::Function *TaskFunction, QualType SharedsTy,
4148                               Address Shareds, const OMPTaskDataTy &Data) {
4149   ASTContext &C = CGM.getContext();
4150   llvm::SmallVector<PrivateDataTy, 4> Privates;
4151   // Aggregate privates and sort them by the alignment.
4152   const auto *I = Data.PrivateCopies.begin();
4153   for (const Expr *E : Data.PrivateVars) {
4154     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4155     Privates.emplace_back(
4156         C.getDeclAlign(VD),
4157         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4158                          /*PrivateElemInit=*/nullptr));
4159     ++I;
4160   }
4161   I = Data.FirstprivateCopies.begin();
4162   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4163   for (const Expr *E : Data.FirstprivateVars) {
4164     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4165     Privates.emplace_back(
4166         C.getDeclAlign(VD),
4167         PrivateHelpersTy(
4168             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4169             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4170     ++I;
4171     ++IElemInitRef;
4172   }
4173   I = Data.LastprivateCopies.begin();
4174   for (const Expr *E : Data.LastprivateVars) {
4175     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4176     Privates.emplace_back(
4177         C.getDeclAlign(VD),
4178         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4179                          /*PrivateElemInit=*/nullptr));
4180     ++I;
4181   }
4182   for (const VarDecl *VD : Data.PrivateLocals) {
4183     if (isAllocatableDecl(VD))
4184       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4185     else
4186       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4187   }
4188   llvm::stable_sort(Privates,
4189                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4190                       return L.first > R.first;
4191                     });
4192   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4193   // Build type kmp_routine_entry_t (if not built yet).
4194   emitKmpRoutineEntryT(KmpInt32Ty);
4195   // Build type kmp_task_t (if not built yet).
4196   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4197     if (SavedKmpTaskloopTQTy.isNull()) {
4198       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4199           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4200     }
4201     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4202   } else {
4203     assert((D.getDirectiveKind() == OMPD_task ||
4204             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4205             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4206            "Expected taskloop, task or target directive");
4207     if (SavedKmpTaskTQTy.isNull()) {
4208       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4209           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4210     }
4211     KmpTaskTQTy = SavedKmpTaskTQTy;
4212   }
4213   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4214   // Build particular struct kmp_task_t for the given task.
4215   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4216       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4217   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4218   QualType KmpTaskTWithPrivatesPtrQTy =
4219       C.getPointerType(KmpTaskTWithPrivatesQTy);
4220   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4221   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4222       KmpTaskTWithPrivatesTy->getPointerTo();
4223   llvm::Value *KmpTaskTWithPrivatesTySize =
4224       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4225   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4226 
4227   // Emit initial values for private copies (if any).
4228   llvm::Value *TaskPrivatesMap = nullptr;
4229   llvm::Type *TaskPrivatesMapTy =
4230       std::next(TaskFunction->arg_begin(), 3)->getType();
4231   if (!Privates.empty()) {
4232     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4233     TaskPrivatesMap =
4234         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4235     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4236         TaskPrivatesMap, TaskPrivatesMapTy);
4237   } else {
4238     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4239         cast<llvm::PointerType>(TaskPrivatesMapTy));
4240   }
4241   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4242   // kmp_task_t *tt);
4243   llvm::Function *TaskEntry = emitProxyTaskFunction(
4244       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4245       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4246       TaskPrivatesMap);
4247 
4248   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4249   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4250   // kmp_routine_entry_t *task_entry);
4251   // Task flags. Format is taken from
4252   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4253   // description of kmp_tasking_flags struct.
4254   enum {
4255     TiedFlag = 0x1,
4256     FinalFlag = 0x2,
4257     DestructorsFlag = 0x8,
4258     PriorityFlag = 0x20,
4259     DetachableFlag = 0x40,
4260   };
4261   unsigned Flags = Data.Tied ? TiedFlag : 0;
4262   bool NeedsCleanup = false;
4263   if (!Privates.empty()) {
4264     NeedsCleanup =
4265         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4266     if (NeedsCleanup)
4267       Flags = Flags | DestructorsFlag;
4268   }
4269   if (Data.Priority.getInt())
4270     Flags = Flags | PriorityFlag;
4271   if (D.hasClausesOfKind<OMPDetachClause>())
4272     Flags = Flags | DetachableFlag;
4273   llvm::Value *TaskFlags =
4274       Data.Final.getPointer()
4275           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4276                                      CGF.Builder.getInt32(FinalFlag),
4277                                      CGF.Builder.getInt32(/*C=*/0))
4278           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4279   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4280   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4281   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4282       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4283       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4284           TaskEntry, KmpRoutineEntryPtrTy)};
4285   llvm::Value *NewTask;
4286   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4287     // Check if we have any device clause associated with the directive.
4288     const Expr *Device = nullptr;
4289     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4290       Device = C->getDevice();
4291     // Emit device ID if any otherwise use default value.
4292     llvm::Value *DeviceID;
4293     if (Device)
4294       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4295                                            CGF.Int64Ty, /*isSigned=*/true);
4296     else
4297       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4298     AllocArgs.push_back(DeviceID);
4299     NewTask = CGF.EmitRuntimeCall(
4300         OMPBuilder.getOrCreateRuntimeFunction(
4301             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4302         AllocArgs);
4303   } else {
4304     NewTask =
4305         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4306                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4307                             AllocArgs);
4308   }
4309   // Emit detach clause initialization.
4310   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4311   // task_descriptor);
4312   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4313     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4314     LValue EvtLVal = CGF.EmitLValue(Evt);
4315 
4316     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4317     // int gtid, kmp_task_t *task);
4318     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4319     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4320     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4321     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4322         OMPBuilder.getOrCreateRuntimeFunction(
4323             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4324         {Loc, Tid, NewTask});
4325     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4326                                       Evt->getExprLoc());
4327     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4328   }
4329   // Process affinity clauses.
4330   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4331     // Process list of affinity data.
4332     ASTContext &C = CGM.getContext();
4333     Address AffinitiesArray = Address::invalid();
4334     // Calculate number of elements to form the array of affinity data.
4335     llvm::Value *NumOfElements = nullptr;
4336     unsigned NumAffinities = 0;
4337     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4338       if (const Expr *Modifier = C->getModifier()) {
4339         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4340         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4341           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4342           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4343           NumOfElements =
4344               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4345         }
4346       } else {
4347         NumAffinities += C->varlist_size();
4348       }
4349     }
4350     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4351     // Fields ids in kmp_task_affinity_info record.
4352     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4353 
4354     QualType KmpTaskAffinityInfoArrayTy;
4355     if (NumOfElements) {
4356       NumOfElements = CGF.Builder.CreateNUWAdd(
4357           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4358       OpaqueValueExpr OVE(
4359           Loc,
4360           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4361           VK_RValue);
4362       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4363                                                     RValue::get(NumOfElements));
4364       KmpTaskAffinityInfoArrayTy =
4365           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4366                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4367       // Properly emit variable-sized array.
4368       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4369                                            ImplicitParamDecl::Other);
4370       CGF.EmitVarDecl(*PD);
4371       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4372       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4373                                                 /*isSigned=*/false);
4374     } else {
4375       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4376           KmpTaskAffinityInfoTy,
4377           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4378           ArrayType::Normal, /*IndexTypeQuals=*/0);
4379       AffinitiesArray =
4380           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4381       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4382       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4383                                              /*isSigned=*/false);
4384     }
4385 
4386     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4387     // Fill array by elements without iterators.
4388     unsigned Pos = 0;
4389     bool HasIterator = false;
4390     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4391       if (C->getModifier()) {
4392         HasIterator = true;
4393         continue;
4394       }
4395       for (const Expr *E : C->varlists()) {
4396         llvm::Value *Addr;
4397         llvm::Value *Size;
4398         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4399         LValue Base =
4400             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4401                                KmpTaskAffinityInfoTy);
4402         // affs[i].base_addr = &<Affinities[i].second>;
4403         LValue BaseAddrLVal = CGF.EmitLValueForField(
4404             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4405         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4406                               BaseAddrLVal);
4407         // affs[i].len = sizeof(<Affinities[i].second>);
4408         LValue LenLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4410         CGF.EmitStoreOfScalar(Size, LenLVal);
4411         ++Pos;
4412       }
4413     }
4414     LValue PosLVal;
4415     if (HasIterator) {
4416       PosLVal = CGF.MakeAddrLValue(
4417           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4418           C.getSizeType());
4419       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4420     }
4421     // Process elements with iterators.
4422     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4423       const Expr *Modifier = C->getModifier();
4424       if (!Modifier)
4425         continue;
4426       OMPIteratorGeneratorScope IteratorScope(
4427           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4428       for (const Expr *E : C->varlists()) {
4429         llvm::Value *Addr;
4430         llvm::Value *Size;
4431         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4432         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4433         LValue Base = CGF.MakeAddrLValue(
4434             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4435                     AffinitiesArray.getAlignment()),
4436             KmpTaskAffinityInfoTy);
4437         // affs[i].base_addr = &<Affinities[i].second>;
4438         LValue BaseAddrLVal = CGF.EmitLValueForField(
4439             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4440         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4441                               BaseAddrLVal);
4442         // affs[i].len = sizeof(<Affinities[i].second>);
4443         LValue LenLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4445         CGF.EmitStoreOfScalar(Size, LenLVal);
4446         Idx = CGF.Builder.CreateNUWAdd(
4447             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4448         CGF.EmitStoreOfScalar(Idx, PosLVal);
4449       }
4450     }
4451     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4452     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4453     // naffins, kmp_task_affinity_info_t *affin_list);
4454     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4455     llvm::Value *GTid = getThreadID(CGF, Loc);
4456     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4457         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4458     // FIXME: Emit the function and ignore its result for now unless the
4459     // runtime function is properly implemented.
4460     (void)CGF.EmitRuntimeCall(
4461         OMPBuilder.getOrCreateRuntimeFunction(
4462             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4463         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4464   }
4465   llvm::Value *NewTaskNewTaskTTy =
4466       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4467           NewTask, KmpTaskTWithPrivatesPtrTy);
4468   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4469                                                KmpTaskTWithPrivatesQTy);
4470   LValue TDBase =
4471       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4472   // Fill the data in the resulting kmp_task_t record.
4473   // Copy shareds if there are any.
4474   Address KmpTaskSharedsPtr = Address::invalid();
4475   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4476     KmpTaskSharedsPtr =
4477         Address(CGF.EmitLoadOfScalar(
4478                     CGF.EmitLValueForField(
4479                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4480                                            KmpTaskTShareds)),
4481                     Loc),
4482                 CGM.getNaturalTypeAlignment(SharedsTy));
4483     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4484     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4485     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4486   }
4487   // Emit initial values for private copies (if any).
4488   TaskResultTy Result;
4489   if (!Privates.empty()) {
4490     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4491                      SharedsTy, SharedsPtrTy, Data, Privates,
4492                      /*ForDup=*/false);
4493     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4494         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4495       Result.TaskDupFn = emitTaskDupFunction(
4496           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4497           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4498           /*WithLastIter=*/!Data.LastprivateVars.empty());
4499     }
4500   }
4501   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4502   enum { Priority = 0, Destructors = 1 };
4503   // Provide pointer to function with destructors for privates.
4504   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4505   const RecordDecl *KmpCmplrdataUD =
4506       (*FI)->getType()->getAsUnionType()->getDecl();
4507   if (NeedsCleanup) {
4508     llvm::Value *DestructorFn = emitDestructorsFunction(
4509         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4510         KmpTaskTWithPrivatesQTy);
4511     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4512     LValue DestructorsLV = CGF.EmitLValueForField(
4513         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4514     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4515                               DestructorFn, KmpRoutineEntryPtrTy),
4516                           DestructorsLV);
4517   }
4518   // Set priority.
4519   if (Data.Priority.getInt()) {
4520     LValue Data2LV = CGF.EmitLValueForField(
4521         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4522     LValue PriorityLV = CGF.EmitLValueForField(
4523         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4524     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4525   }
4526   Result.NewTask = NewTask;
4527   Result.TaskEntry = TaskEntry;
4528   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4529   Result.TDBase = TDBase;
4530   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4531   return Result;
4532 }
4533 
4534 namespace {
4535 /// Dependence kind for RTL.
4536 enum RTLDependenceKindTy {
4537   DepIn = 0x01,
4538   DepInOut = 0x3,
4539   DepMutexInOutSet = 0x4
4540 };
4541 /// Fields ids in kmp_depend_info record.
4542 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4543 } // namespace
4544 
4545 /// Translates internal dependency kind into the runtime kind.
4546 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4547   RTLDependenceKindTy DepKind;
4548   switch (K) {
4549   case OMPC_DEPEND_in:
4550     DepKind = DepIn;
4551     break;
4552   // Out and InOut dependencies must use the same code.
4553   case OMPC_DEPEND_out:
4554   case OMPC_DEPEND_inout:
4555     DepKind = DepInOut;
4556     break;
4557   case OMPC_DEPEND_mutexinoutset:
4558     DepKind = DepMutexInOutSet;
4559     break;
4560   case OMPC_DEPEND_source:
4561   case OMPC_DEPEND_sink:
4562   case OMPC_DEPEND_depobj:
4563   case OMPC_DEPEND_unknown:
4564     llvm_unreachable("Unknown task dependence type");
4565   }
4566   return DepKind;
4567 }
4568 
4569 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4570 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4571                            QualType &FlagsTy) {
4572   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4573   if (KmpDependInfoTy.isNull()) {
4574     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4575     KmpDependInfoRD->startDefinition();
4576     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4577     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4578     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4579     KmpDependInfoRD->completeDefinition();
4580     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4581   }
4582 }
4583 
4584 std::pair<llvm::Value *, LValue>
4585 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4586                                    SourceLocation Loc) {
4587   ASTContext &C = CGM.getContext();
4588   QualType FlagsTy;
4589   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4590   RecordDecl *KmpDependInfoRD =
4591       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4592   LValue Base = CGF.EmitLoadOfPointerLValue(
4593       DepobjLVal.getAddress(CGF),
4594       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4595   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4596   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4597           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4598   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4599                             Base.getTBAAInfo());
4600   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4601       Addr.getPointer(),
4602       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4603   LValue NumDepsBase = CGF.MakeAddrLValue(
4604       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4605       Base.getBaseInfo(), Base.getTBAAInfo());
4606   // NumDeps = deps[i].base_addr;
4607   LValue BaseAddrLVal = CGF.EmitLValueForField(
4608       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4609   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4610   return std::make_pair(NumDeps, Base);
4611 }
4612 
4613 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4614                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4615                            const OMPTaskDataTy::DependData &Data,
4616                            Address DependenciesArray) {
4617   CodeGenModule &CGM = CGF.CGM;
4618   ASTContext &C = CGM.getContext();
4619   QualType FlagsTy;
4620   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4621   RecordDecl *KmpDependInfoRD =
4622       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4623   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4624 
4625   OMPIteratorGeneratorScope IteratorScope(
4626       CGF, cast_or_null<OMPIteratorExpr>(
4627                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4628                                  : nullptr));
4629   for (const Expr *E : Data.DepExprs) {
4630     llvm::Value *Addr;
4631     llvm::Value *Size;
4632     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4633     LValue Base;
4634     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4635       Base = CGF.MakeAddrLValue(
4636           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4637     } else {
4638       LValue &PosLVal = *Pos.get<LValue *>();
4639       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4640       Base = CGF.MakeAddrLValue(
4641           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4642                   DependenciesArray.getAlignment()),
4643           KmpDependInfoTy);
4644     }
4645     // deps[i].base_addr = &<Dependencies[i].second>;
4646     LValue BaseAddrLVal = CGF.EmitLValueForField(
4647         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4648     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4649                           BaseAddrLVal);
4650     // deps[i].len = sizeof(<Dependencies[i].second>);
4651     LValue LenLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4653     CGF.EmitStoreOfScalar(Size, LenLVal);
4654     // deps[i].flags = <Dependencies[i].first>;
4655     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4656     LValue FlagsLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4658     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4659                           FlagsLVal);
4660     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4661       ++(*P);
4662     } else {
4663       LValue &PosLVal = *Pos.get<LValue *>();
4664       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4665       Idx = CGF.Builder.CreateNUWAdd(Idx,
4666                                      llvm::ConstantInt::get(Idx->getType(), 1));
4667       CGF.EmitStoreOfScalar(Idx, PosLVal);
4668     }
4669   }
4670 }
4671 
4672 static SmallVector<llvm::Value *, 4>
4673 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4674                         const OMPTaskDataTy::DependData &Data) {
4675   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4676          "Expected depobj dependecy kind.");
4677   SmallVector<llvm::Value *, 4> Sizes;
4678   SmallVector<LValue, 4> SizeLVals;
4679   ASTContext &C = CGF.getContext();
4680   QualType FlagsTy;
4681   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4682   RecordDecl *KmpDependInfoRD =
4683       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4684   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4685   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4686   {
4687     OMPIteratorGeneratorScope IteratorScope(
4688         CGF, cast_or_null<OMPIteratorExpr>(
4689                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4690                                    : nullptr));
4691     for (const Expr *E : Data.DepExprs) {
4692       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4693       LValue Base = CGF.EmitLoadOfPointerLValue(
4694           DepobjLVal.getAddress(CGF),
4695           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4696       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4697           Base.getAddress(CGF), KmpDependInfoPtrT);
4698       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4699                                 Base.getTBAAInfo());
4700       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4701           Addr.getPointer(),
4702           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4703       LValue NumDepsBase = CGF.MakeAddrLValue(
4704           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4705           Base.getBaseInfo(), Base.getTBAAInfo());
4706       // NumDeps = deps[i].base_addr;
4707       LValue BaseAddrLVal = CGF.EmitLValueForField(
4708           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4709       llvm::Value *NumDeps =
4710           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4711       LValue NumLVal = CGF.MakeAddrLValue(
4712           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4713           C.getUIntPtrType());
4714       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4715                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4716       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4717       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4718       CGF.EmitStoreOfScalar(Add, NumLVal);
4719       SizeLVals.push_back(NumLVal);
4720     }
4721   }
4722   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4723     llvm::Value *Size =
4724         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4725     Sizes.push_back(Size);
4726   }
4727   return Sizes;
4728 }
4729 
4730 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731                                LValue PosLVal,
4732                                const OMPTaskDataTy::DependData &Data,
4733                                Address DependenciesArray) {
4734   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4735          "Expected depobj dependecy kind.");
4736   ASTContext &C = CGF.getContext();
4737   QualType FlagsTy;
4738   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4739   RecordDecl *KmpDependInfoRD =
4740       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4741   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4742   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4743   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4744   {
4745     OMPIteratorGeneratorScope IteratorScope(
4746         CGF, cast_or_null<OMPIteratorExpr>(
4747                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4748                                    : nullptr));
4749     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4750       const Expr *E = Data.DepExprs[I];
4751       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4752       LValue Base = CGF.EmitLoadOfPointerLValue(
4753           DepobjLVal.getAddress(CGF),
4754           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4755       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4756           Base.getAddress(CGF), KmpDependInfoPtrT);
4757       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4758                                 Base.getTBAAInfo());
4759 
4760       // Get number of elements in a single depobj.
4761       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4762           Addr.getPointer(),
4763           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4764       LValue NumDepsBase = CGF.MakeAddrLValue(
4765           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4766           Base.getBaseInfo(), Base.getTBAAInfo());
4767       // NumDeps = deps[i].base_addr;
4768       LValue BaseAddrLVal = CGF.EmitLValueForField(
4769           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4770       llvm::Value *NumDeps =
4771           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4772 
4773       // memcopy dependency data.
4774       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4775           ElSize,
4776           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4777       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4778       Address DepAddr =
4779           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4780                   DependenciesArray.getAlignment());
4781       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4782 
4783       // Increase pos.
4784       // pos += size;
4785       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4786       CGF.EmitStoreOfScalar(Add, PosLVal);
4787     }
4788   }
4789 }
4790 
4791 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4792     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4793     SourceLocation Loc) {
4794   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4795         return D.DepExprs.empty();
4796       }))
4797     return std::make_pair(nullptr, Address::invalid());
4798   // Process list of dependencies.
4799   ASTContext &C = CGM.getContext();
4800   Address DependenciesArray = Address::invalid();
4801   llvm::Value *NumOfElements = nullptr;
4802   unsigned NumDependencies = std::accumulate(
4803       Dependencies.begin(), Dependencies.end(), 0,
4804       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4805         return D.DepKind == OMPC_DEPEND_depobj
4806                    ? V
4807                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4808       });
4809   QualType FlagsTy;
4810   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4811   bool HasDepobjDeps = false;
4812   bool HasRegularWithIterators = false;
4813   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4814   llvm::Value *NumOfRegularWithIterators =
4815       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4816   // Calculate number of depobj dependecies and regular deps with the iterators.
4817   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4818     if (D.DepKind == OMPC_DEPEND_depobj) {
4819       SmallVector<llvm::Value *, 4> Sizes =
4820           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4821       for (llvm::Value *Size : Sizes) {
4822         NumOfDepobjElements =
4823             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4824       }
4825       HasDepobjDeps = true;
4826       continue;
4827     }
4828     // Include number of iterations, if any.
4829     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4830       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4831         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4832         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4833         NumOfRegularWithIterators =
4834             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4835       }
4836       HasRegularWithIterators = true;
4837       continue;
4838     }
4839   }
4840 
4841   QualType KmpDependInfoArrayTy;
4842   if (HasDepobjDeps || HasRegularWithIterators) {
4843     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4844                                            /*isSigned=*/false);
4845     if (HasDepobjDeps) {
4846       NumOfElements =
4847           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4848     }
4849     if (HasRegularWithIterators) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4852     }
4853     OpaqueValueExpr OVE(Loc,
4854                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4855                         VK_RValue);
4856     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4857                                                   RValue::get(NumOfElements));
4858     KmpDependInfoArrayTy =
4859         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4860                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4861     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4862     // Properly emit variable-sized array.
4863     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4864                                          ImplicitParamDecl::Other);
4865     CGF.EmitVarDecl(*PD);
4866     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4867     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4868                                               /*isSigned=*/false);
4869   } else {
4870     KmpDependInfoArrayTy = C.getConstantArrayType(
4871         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4872         ArrayType::Normal, /*IndexTypeQuals=*/0);
4873     DependenciesArray =
4874         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4875     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4876     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4877                                            /*isSigned=*/false);
4878   }
4879   unsigned Pos = 0;
4880   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4881     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4882         Dependencies[I].IteratorExpr)
4883       continue;
4884     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4885                    DependenciesArray);
4886   }
4887   // Copy regular dependecies with iterators.
4888   LValue PosLVal = CGF.MakeAddrLValue(
4889       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4890   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4891   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4892     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4893         !Dependencies[I].IteratorExpr)
4894       continue;
4895     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4896                    DependenciesArray);
4897   }
4898   // Copy final depobj arrays without iterators.
4899   if (HasDepobjDeps) {
4900     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4901       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4902         continue;
4903       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4904                          DependenciesArray);
4905     }
4906   }
4907   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4908       DependenciesArray, CGF.VoidPtrTy);
4909   return std::make_pair(NumOfElements, DependenciesArray);
4910 }
4911 
4912 Address CGOpenMPRuntime::emitDepobjDependClause(
4913     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4914     SourceLocation Loc) {
4915   if (Dependencies.DepExprs.empty())
4916     return Address::invalid();
4917   // Process list of dependencies.
4918   ASTContext &C = CGM.getContext();
4919   Address DependenciesArray = Address::invalid();
4920   unsigned NumDependencies = Dependencies.DepExprs.size();
4921   QualType FlagsTy;
4922   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4923   RecordDecl *KmpDependInfoRD =
4924       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4925 
4926   llvm::Value *Size;
4927   // Define type kmp_depend_info[<Dependencies.size()>];
4928   // For depobj reserve one extra element to store the number of elements.
4929   // It is required to handle depobj(x) update(in) construct.
4930   // kmp_depend_info[<Dependencies.size()>] deps;
4931   llvm::Value *NumDepsVal;
4932   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4933   if (const auto *IE =
4934           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4935     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4936     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4937       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4938       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4939       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4940     }
4941     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4942                                     NumDepsVal);
4943     CharUnits SizeInBytes =
4944         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4945     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4946     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4947     NumDepsVal =
4948         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4949   } else {
4950     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4951         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4952         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4953     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4954     Size = CGM.getSize(Sz.alignTo(Align));
4955     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4956   }
4957   // Need to allocate on the dynamic memory.
4958   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4959   // Use default allocator.
4960   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4961   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4962 
4963   llvm::Value *Addr =
4964       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4965                               CGM.getModule(), OMPRTL___kmpc_alloc),
4966                           Args, ".dep.arr.addr");
4967   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4968       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4969   DependenciesArray = Address(Addr, Align);
4970   // Write number of elements in the first element of array for depobj.
4971   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4972   // deps[i].base_addr = NumDependencies;
4973   LValue BaseAddrLVal = CGF.EmitLValueForField(
4974       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4975   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4976   llvm::PointerUnion<unsigned *, LValue *> Pos;
4977   unsigned Idx = 1;
4978   LValue PosLVal;
4979   if (Dependencies.IteratorExpr) {
4980     PosLVal = CGF.MakeAddrLValue(
4981         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4982         C.getSizeType());
4983     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4984                           /*IsInit=*/true);
4985     Pos = &PosLVal;
4986   } else {
4987     Pos = &Idx;
4988   }
4989   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4990   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4992   return DependenciesArray;
4993 }
4994 
4995 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4996                                         SourceLocation Loc) {
4997   ASTContext &C = CGM.getContext();
4998   QualType FlagsTy;
4999   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5000   LValue Base = CGF.EmitLoadOfPointerLValue(
5001       DepobjLVal.getAddress(CGF),
5002       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5003   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5004   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5005       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5006   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5007       Addr.getPointer(),
5008       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5009   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5010                                                                CGF.VoidPtrTy);
5011   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012   // Use default allocator.
5013   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5014   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5015 
5016   // _kmpc_free(gtid, addr, nullptr);
5017   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5018                                 CGM.getModule(), OMPRTL___kmpc_free),
5019                             Args);
5020 }
5021 
5022 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5023                                        OpenMPDependClauseKind NewDepKind,
5024                                        SourceLocation Loc) {
5025   ASTContext &C = CGM.getContext();
5026   QualType FlagsTy;
5027   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5028   RecordDecl *KmpDependInfoRD =
5029       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5030   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5031   llvm::Value *NumDeps;
5032   LValue Base;
5033   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5034 
5035   Address Begin = Base.getAddress(CGF);
5036   // Cast from pointer to array type to pointer to single element.
5037   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5038   // The basic structure here is a while-do loop.
5039   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5040   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5041   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5042   CGF.EmitBlock(BodyBB);
5043   llvm::PHINode *ElementPHI =
5044       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5045   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5046   Begin = Address(ElementPHI, Begin.getAlignment());
5047   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5048                             Base.getTBAAInfo());
5049   // deps[i].flags = NewDepKind;
5050   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5051   LValue FlagsLVal = CGF.EmitLValueForField(
5052       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5053   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5054                         FlagsLVal);
5055 
5056   // Shift the address forward by one element.
5057   Address ElementNext =
5058       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5059   ElementPHI->addIncoming(ElementNext.getPointer(),
5060                           CGF.Builder.GetInsertBlock());
5061   llvm::Value *IsEmpty =
5062       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5063   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5064   // Done.
5065   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5066 }
5067 
5068 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5069                                    const OMPExecutableDirective &D,
5070                                    llvm::Function *TaskFunction,
5071                                    QualType SharedsTy, Address Shareds,
5072                                    const Expr *IfCond,
5073                                    const OMPTaskDataTy &Data) {
5074   if (!CGF.HaveInsertPoint())
5075     return;
5076 
5077   TaskResultTy Result =
5078       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5079   llvm::Value *NewTask = Result.NewTask;
5080   llvm::Function *TaskEntry = Result.TaskEntry;
5081   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5082   LValue TDBase = Result.TDBase;
5083   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5084   // Process list of dependences.
5085   Address DependenciesArray = Address::invalid();
5086   llvm::Value *NumOfElements;
5087   std::tie(NumOfElements, DependenciesArray) =
5088       emitDependClause(CGF, Data.Dependences, Loc);
5089 
5090   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5091   // libcall.
5092   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5093   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5094   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5095   // list is not empty
5096   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5097   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5098   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5099   llvm::Value *DepTaskArgs[7];
5100   if (!Data.Dependences.empty()) {
5101     DepTaskArgs[0] = UpLoc;
5102     DepTaskArgs[1] = ThreadID;
5103     DepTaskArgs[2] = NewTask;
5104     DepTaskArgs[3] = NumOfElements;
5105     DepTaskArgs[4] = DependenciesArray.getPointer();
5106     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5107     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5108   }
5109   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5110                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5111     if (!Data.Tied) {
5112       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5113       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5114       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5115     }
5116     if (!Data.Dependences.empty()) {
5117       CGF.EmitRuntimeCall(
5118           OMPBuilder.getOrCreateRuntimeFunction(
5119               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5120           DepTaskArgs);
5121     } else {
5122       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5123                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5124                           TaskArgs);
5125     }
5126     // Check if parent region is untied and build return for untied task;
5127     if (auto *Region =
5128             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5129       Region->emitUntiedSwitch(CGF);
5130   };
5131 
5132   llvm::Value *DepWaitTaskArgs[6];
5133   if (!Data.Dependences.empty()) {
5134     DepWaitTaskArgs[0] = UpLoc;
5135     DepWaitTaskArgs[1] = ThreadID;
5136     DepWaitTaskArgs[2] = NumOfElements;
5137     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5138     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5139     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5140   }
5141   auto &M = CGM.getModule();
5142   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5143                         TaskEntry, &Data, &DepWaitTaskArgs,
5144                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5145     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5146     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5147     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5148     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5149     // is specified.
5150     if (!Data.Dependences.empty())
5151       CGF.EmitRuntimeCall(
5152           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5153           DepWaitTaskArgs);
5154     // Call proxy_task_entry(gtid, new_task);
5155     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5156                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5157       Action.Enter(CGF);
5158       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5159       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5160                                                           OutlinedFnArgs);
5161     };
5162 
5163     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5164     // kmp_task_t *new_task);
5165     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5166     // kmp_task_t *new_task);
5167     RegionCodeGenTy RCG(CodeGen);
5168     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5169                               M, OMPRTL___kmpc_omp_task_begin_if0),
5170                           TaskArgs,
5171                           OMPBuilder.getOrCreateRuntimeFunction(
5172                               M, OMPRTL___kmpc_omp_task_complete_if0),
5173                           TaskArgs);
5174     RCG.setAction(Action);
5175     RCG(CGF);
5176   };
5177 
5178   if (IfCond) {
5179     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5180   } else {
5181     RegionCodeGenTy ThenRCG(ThenCodeGen);
5182     ThenRCG(CGF);
5183   }
5184 }
5185 
5186 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5187                                        const OMPLoopDirective &D,
5188                                        llvm::Function *TaskFunction,
5189                                        QualType SharedsTy, Address Shareds,
5190                                        const Expr *IfCond,
5191                                        const OMPTaskDataTy &Data) {
5192   if (!CGF.HaveInsertPoint())
5193     return;
5194   TaskResultTy Result =
5195       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5196   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5197   // libcall.
5198   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5199   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5200   // sched, kmp_uint64 grainsize, void *task_dup);
5201   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5202   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5203   llvm::Value *IfVal;
5204   if (IfCond) {
5205     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5206                                       /*isSigned=*/true);
5207   } else {
5208     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5209   }
5210 
5211   LValue LBLVal = CGF.EmitLValueForField(
5212       Result.TDBase,
5213       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5214   const auto *LBVar =
5215       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5216   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5217                        LBLVal.getQuals(),
5218                        /*IsInitializer=*/true);
5219   LValue UBLVal = CGF.EmitLValueForField(
5220       Result.TDBase,
5221       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5222   const auto *UBVar =
5223       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5224   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5225                        UBLVal.getQuals(),
5226                        /*IsInitializer=*/true);
5227   LValue StLVal = CGF.EmitLValueForField(
5228       Result.TDBase,
5229       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5230   const auto *StVar =
5231       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5232   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5233                        StLVal.getQuals(),
5234                        /*IsInitializer=*/true);
5235   // Store reductions address.
5236   LValue RedLVal = CGF.EmitLValueForField(
5237       Result.TDBase,
5238       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5239   if (Data.Reductions) {
5240     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5241   } else {
5242     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5243                                CGF.getContext().VoidPtrTy);
5244   }
5245   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5246   llvm::Value *TaskArgs[] = {
5247       UpLoc,
5248       ThreadID,
5249       Result.NewTask,
5250       IfVal,
5251       LBLVal.getPointer(CGF),
5252       UBLVal.getPointer(CGF),
5253       CGF.EmitLoadOfScalar(StLVal, Loc),
5254       llvm::ConstantInt::getSigned(
5255           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5256       llvm::ConstantInt::getSigned(
5257           CGF.IntTy, Data.Schedule.getPointer()
5258                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5259                          : NoSchedule),
5260       Data.Schedule.getPointer()
5261           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5262                                       /*isSigned=*/false)
5263           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5264       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5265                              Result.TaskDupFn, CGF.VoidPtrTy)
5266                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5267   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5268                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5269                       TaskArgs);
5270 }
5271 
5272 /// Emit reduction operation for each element of array (required for
5273 /// array sections) LHS op = RHS.
5274 /// \param Type Type of array.
5275 /// \param LHSVar Variable on the left side of the reduction operation
5276 /// (references element of array in original variable).
5277 /// \param RHSVar Variable on the right side of the reduction operation
5278 /// (references element of array in original variable).
5279 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5280 /// RHSVar.
5281 static void EmitOMPAggregateReduction(
5282     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5283     const VarDecl *RHSVar,
5284     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5285                                   const Expr *, const Expr *)> &RedOpGen,
5286     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5287     const Expr *UpExpr = nullptr) {
5288   // Perform element-by-element initialization.
5289   QualType ElementTy;
5290   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5291   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5292 
5293   // Drill down to the base element type on both arrays.
5294   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5295   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5296 
5297   llvm::Value *RHSBegin = RHSAddr.getPointer();
5298   llvm::Value *LHSBegin = LHSAddr.getPointer();
5299   // Cast from pointer to array type to pointer to single element.
5300   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5301   // The basic structure here is a while-do loop.
5302   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5303   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5304   llvm::Value *IsEmpty =
5305       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5306   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5307 
5308   // Enter the loop body, making that address the current address.
5309   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5310   CGF.EmitBlock(BodyBB);
5311 
5312   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5313 
5314   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5315       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5316   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5317   Address RHSElementCurrent =
5318       Address(RHSElementPHI,
5319               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5320 
5321   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5322       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5323   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5324   Address LHSElementCurrent =
5325       Address(LHSElementPHI,
5326               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5327 
5328   // Emit copy.
5329   CodeGenFunction::OMPPrivateScope Scope(CGF);
5330   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5331   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5332   Scope.Privatize();
5333   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5334   Scope.ForceCleanup();
5335 
5336   // Shift the address forward by one element.
5337   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5338       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5339   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5340       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5341   // Check whether we've reached the end.
5342   llvm::Value *Done =
5343       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5344   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5345   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5346   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5347 
5348   // Done.
5349   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5350 }
5351 
5352 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5353 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5354 /// UDR combiner function.
5355 static void emitReductionCombiner(CodeGenFunction &CGF,
5356                                   const Expr *ReductionOp) {
5357   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5358     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5359       if (const auto *DRE =
5360               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5361         if (const auto *DRD =
5362                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5363           std::pair<llvm::Function *, llvm::Function *> Reduction =
5364               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5365           RValue Func = RValue::get(Reduction.first);
5366           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5367           CGF.EmitIgnoredExpr(ReductionOp);
5368           return;
5369         }
5370   CGF.EmitIgnoredExpr(ReductionOp);
5371 }
5372 
5373 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5374     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5375     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5376     ArrayRef<const Expr *> ReductionOps) {
5377   ASTContext &C = CGM.getContext();
5378 
5379   // void reduction_func(void *LHSArg, void *RHSArg);
5380   FunctionArgList Args;
5381   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5382                            ImplicitParamDecl::Other);
5383   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5384                            ImplicitParamDecl::Other);
5385   Args.push_back(&LHSArg);
5386   Args.push_back(&RHSArg);
5387   const auto &CGFI =
5388       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5389   std::string Name = getName({"omp", "reduction", "reduction_func"});
5390   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5391                                     llvm::GlobalValue::InternalLinkage, Name,
5392                                     &CGM.getModule());
5393   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5394   Fn->setDoesNotRecurse();
5395   CodeGenFunction CGF(CGM);
5396   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5397 
5398   // Dst = (void*[n])(LHSArg);
5399   // Src = (void*[n])(RHSArg);
5400   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5402       ArgsType), CGF.getPointerAlign());
5403   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5405       ArgsType), CGF.getPointerAlign());
5406 
5407   //  ...
5408   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5409   //  ...
5410   CodeGenFunction::OMPPrivateScope Scope(CGF);
5411   auto IPriv = Privates.begin();
5412   unsigned Idx = 0;
5413   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5414     const auto *RHSVar =
5415         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5416     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5417       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5418     });
5419     const auto *LHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5421     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5423     });
5424     QualType PrivTy = (*IPriv)->getType();
5425     if (PrivTy->isVariablyModifiedType()) {
5426       // Get array size and emit VLA type.
5427       ++Idx;
5428       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5429       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5430       const VariableArrayType *VLA =
5431           CGF.getContext().getAsVariableArrayType(PrivTy);
5432       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5433       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5434           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5435       CGF.EmitVariablyModifiedType(PrivTy);
5436     }
5437   }
5438   Scope.Privatize();
5439   IPriv = Privates.begin();
5440   auto ILHS = LHSExprs.begin();
5441   auto IRHS = RHSExprs.begin();
5442   for (const Expr *E : ReductionOps) {
5443     if ((*IPriv)->getType()->isArrayType()) {
5444       // Emit reduction for array section.
5445       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5446       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5447       EmitOMPAggregateReduction(
5448           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5449           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5450             emitReductionCombiner(CGF, E);
5451           });
5452     } else {
5453       // Emit reduction for array subscript or single variable.
5454       emitReductionCombiner(CGF, E);
5455     }
5456     ++IPriv;
5457     ++ILHS;
5458     ++IRHS;
5459   }
5460   Scope.ForceCleanup();
5461   CGF.FinishFunction();
5462   return Fn;
5463 }
5464 
5465 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5466                                                   const Expr *ReductionOp,
5467                                                   const Expr *PrivateRef,
5468                                                   const DeclRefExpr *LHS,
5469                                                   const DeclRefExpr *RHS) {
5470   if (PrivateRef->getType()->isArrayType()) {
5471     // Emit reduction for array section.
5472     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5473     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5474     EmitOMPAggregateReduction(
5475         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5476         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5477           emitReductionCombiner(CGF, ReductionOp);
5478         });
5479   } else {
5480     // Emit reduction for array subscript or single variable.
5481     emitReductionCombiner(CGF, ReductionOp);
5482   }
5483 }
5484 
5485 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5486                                     ArrayRef<const Expr *> Privates,
5487                                     ArrayRef<const Expr *> LHSExprs,
5488                                     ArrayRef<const Expr *> RHSExprs,
5489                                     ArrayRef<const Expr *> ReductionOps,
5490                                     ReductionOptionsTy Options) {
5491   if (!CGF.HaveInsertPoint())
5492     return;
5493 
5494   bool WithNowait = Options.WithNowait;
5495   bool SimpleReduction = Options.SimpleReduction;
5496 
5497   // Next code should be emitted for reduction:
5498   //
5499   // static kmp_critical_name lock = { 0 };
5500   //
5501   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5502   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5503   //  ...
5504   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5505   //  *(Type<n>-1*)rhs[<n>-1]);
5506   // }
5507   //
5508   // ...
5509   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5510   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5511   // RedList, reduce_func, &<lock>)) {
5512   // case 1:
5513   //  ...
5514   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5515   //  ...
5516   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5517   // break;
5518   // case 2:
5519   //  ...
5520   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5521   //  ...
5522   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5523   // break;
5524   // default:;
5525   // }
5526   //
5527   // if SimpleReduction is true, only the next code is generated:
5528   //  ...
5529   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5530   //  ...
5531 
5532   ASTContext &C = CGM.getContext();
5533 
5534   if (SimpleReduction) {
5535     CodeGenFunction::RunCleanupsScope Scope(CGF);
5536     auto IPriv = Privates.begin();
5537     auto ILHS = LHSExprs.begin();
5538     auto IRHS = RHSExprs.begin();
5539     for (const Expr *E : ReductionOps) {
5540       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5541                                   cast<DeclRefExpr>(*IRHS));
5542       ++IPriv;
5543       ++ILHS;
5544       ++IRHS;
5545     }
5546     return;
5547   }
5548 
5549   // 1. Build a list of reduction variables.
5550   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5551   auto Size = RHSExprs.size();
5552   for (const Expr *E : Privates) {
5553     if (E->getType()->isVariablyModifiedType())
5554       // Reserve place for array size.
5555       ++Size;
5556   }
5557   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5558   QualType ReductionArrayTy =
5559       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5560                              /*IndexTypeQuals=*/0);
5561   Address ReductionList =
5562       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5563   auto IPriv = Privates.begin();
5564   unsigned Idx = 0;
5565   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5566     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5567     CGF.Builder.CreateStore(
5568         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5570         Elem);
5571     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5572       // Store array size.
5573       ++Idx;
5574       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5575       llvm::Value *Size = CGF.Builder.CreateIntCast(
5576           CGF.getVLASize(
5577                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5578               .NumElts,
5579           CGF.SizeTy, /*isSigned=*/false);
5580       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5581                               Elem);
5582     }
5583   }
5584 
5585   // 2. Emit reduce_func().
5586   llvm::Function *ReductionFn = emitReductionFunction(
5587       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5588       LHSExprs, RHSExprs, ReductionOps);
5589 
5590   // 3. Create static kmp_critical_name lock = { 0 };
5591   std::string Name = getName({"reduction"});
5592   llvm::Value *Lock = getCriticalRegionLock(Name);
5593 
5594   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5595   // RedList, reduce_func, &<lock>);
5596   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5597   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5598   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5599   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600       ReductionList.getPointer(), CGF.VoidPtrTy);
5601   llvm::Value *Args[] = {
5602       IdentTLoc,                             // ident_t *<loc>
5603       ThreadId,                              // i32 <gtid>
5604       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5605       ReductionArrayTySize,                  // size_type sizeof(RedList)
5606       RL,                                    // void *RedList
5607       ReductionFn, // void (*) (void *, void *) <reduce_func>
5608       Lock         // kmp_critical_name *&<lock>
5609   };
5610   llvm::Value *Res = CGF.EmitRuntimeCall(
5611       OMPBuilder.getOrCreateRuntimeFunction(
5612           CGM.getModule(),
5613           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5614       Args);
5615 
5616   // 5. Build switch(res)
5617   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5618   llvm::SwitchInst *SwInst =
5619       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5620 
5621   // 6. Build case 1:
5622   //  ...
5623   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5624   //  ...
5625   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5626   // break;
5627   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5628   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5629   CGF.EmitBlock(Case1BB);
5630 
5631   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5632   llvm::Value *EndArgs[] = {
5633       IdentTLoc, // ident_t *<loc>
5634       ThreadId,  // i32 <gtid>
5635       Lock       // kmp_critical_name *&<lock>
5636   };
5637   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5638                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5639     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5640     auto IPriv = Privates.begin();
5641     auto ILHS = LHSExprs.begin();
5642     auto IRHS = RHSExprs.begin();
5643     for (const Expr *E : ReductionOps) {
5644       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5645                                      cast<DeclRefExpr>(*IRHS));
5646       ++IPriv;
5647       ++ILHS;
5648       ++IRHS;
5649     }
5650   };
5651   RegionCodeGenTy RCG(CodeGen);
5652   CommonActionTy Action(
5653       nullptr, llvm::None,
5654       OMPBuilder.getOrCreateRuntimeFunction(
5655           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5656                                       : OMPRTL___kmpc_end_reduce),
5657       EndArgs);
5658   RCG.setAction(Action);
5659   RCG(CGF);
5660 
5661   CGF.EmitBranch(DefaultBB);
5662 
5663   // 7. Build case 2:
5664   //  ...
5665   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5666   //  ...
5667   // break;
5668   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5669   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5670   CGF.EmitBlock(Case2BB);
5671 
5672   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5673                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5674     auto ILHS = LHSExprs.begin();
5675     auto IRHS = RHSExprs.begin();
5676     auto IPriv = Privates.begin();
5677     for (const Expr *E : ReductionOps) {
5678       const Expr *XExpr = nullptr;
5679       const Expr *EExpr = nullptr;
5680       const Expr *UpExpr = nullptr;
5681       BinaryOperatorKind BO = BO_Comma;
5682       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5683         if (BO->getOpcode() == BO_Assign) {
5684           XExpr = BO->getLHS();
5685           UpExpr = BO->getRHS();
5686         }
5687       }
5688       // Try to emit update expression as a simple atomic.
5689       const Expr *RHSExpr = UpExpr;
5690       if (RHSExpr) {
5691         // Analyze RHS part of the whole expression.
5692         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5693                 RHSExpr->IgnoreParenImpCasts())) {
5694           // If this is a conditional operator, analyze its condition for
5695           // min/max reduction operator.
5696           RHSExpr = ACO->getCond();
5697         }
5698         if (const auto *BORHS =
5699                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5700           EExpr = BORHS->getRHS();
5701           BO = BORHS->getOpcode();
5702         }
5703       }
5704       if (XExpr) {
5705         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5706         auto &&AtomicRedGen = [BO, VD,
5707                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5708                                     const Expr *EExpr, const Expr *UpExpr) {
5709           LValue X = CGF.EmitLValue(XExpr);
5710           RValue E;
5711           if (EExpr)
5712             E = CGF.EmitAnyExpr(EExpr);
5713           CGF.EmitOMPAtomicSimpleUpdateExpr(
5714               X, E, BO, /*IsXLHSInRHSPart=*/true,
5715               llvm::AtomicOrdering::Monotonic, Loc,
5716               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5717                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5718                 PrivateScope.addPrivate(
5719                     VD, [&CGF, VD, XRValue, Loc]() {
5720                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5721                       CGF.emitOMPSimpleStore(
5722                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5723                           VD->getType().getNonReferenceType(), Loc);
5724                       return LHSTemp;
5725                     });
5726                 (void)PrivateScope.Privatize();
5727                 return CGF.EmitAnyExpr(UpExpr);
5728               });
5729         };
5730         if ((*IPriv)->getType()->isArrayType()) {
5731           // Emit atomic reduction for array section.
5732           const auto *RHSVar =
5733               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5734           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5735                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5736         } else {
5737           // Emit atomic reduction for array subscript or single variable.
5738           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5739         }
5740       } else {
5741         // Emit as a critical region.
5742         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5743                                            const Expr *, const Expr *) {
5744           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5745           std::string Name = RT.getName({"atomic_reduction"});
5746           RT.emitCriticalRegion(
5747               CGF, Name,
5748               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5749                 Action.Enter(CGF);
5750                 emitReductionCombiner(CGF, E);
5751               },
5752               Loc);
5753         };
5754         if ((*IPriv)->getType()->isArrayType()) {
5755           const auto *LHSVar =
5756               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5757           const auto *RHSVar =
5758               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5759           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5760                                     CritRedGen);
5761         } else {
5762           CritRedGen(CGF, nullptr, nullptr, nullptr);
5763         }
5764       }
5765       ++ILHS;
5766       ++IRHS;
5767       ++IPriv;
5768     }
5769   };
5770   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5771   if (!WithNowait) {
5772     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5773     llvm::Value *EndArgs[] = {
5774         IdentTLoc, // ident_t *<loc>
5775         ThreadId,  // i32 <gtid>
5776         Lock       // kmp_critical_name *&<lock>
5777     };
5778     CommonActionTy Action(nullptr, llvm::None,
5779                           OMPBuilder.getOrCreateRuntimeFunction(
5780                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5781                           EndArgs);
5782     AtomicRCG.setAction(Action);
5783     AtomicRCG(CGF);
5784   } else {
5785     AtomicRCG(CGF);
5786   }
5787 
5788   CGF.EmitBranch(DefaultBB);
5789   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5790 }
5791 
5792 /// Generates unique name for artificial threadprivate variables.
5793 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5794 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5795                                       const Expr *Ref) {
5796   SmallString<256> Buffer;
5797   llvm::raw_svector_ostream Out(Buffer);
5798   const clang::DeclRefExpr *DE;
5799   const VarDecl *D = ::getBaseDecl(Ref, DE);
5800   if (!D)
5801     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5802   D = D->getCanonicalDecl();
5803   std::string Name = CGM.getOpenMPRuntime().getName(
5804       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5805   Out << Prefix << Name << "_"
5806       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5807   return std::string(Out.str());
5808 }
5809 
5810 /// Emits reduction initializer function:
5811 /// \code
5812 /// void @.red_init(void* %arg, void* %orig) {
5813 /// %0 = bitcast void* %arg to <type>*
5814 /// store <type> <init>, <type>* %0
5815 /// ret void
5816 /// }
5817 /// \endcode
5818 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5819                                            SourceLocation Loc,
5820                                            ReductionCodeGen &RCG, unsigned N) {
5821   ASTContext &C = CGM.getContext();
5822   QualType VoidPtrTy = C.VoidPtrTy;
5823   VoidPtrTy.addRestrict();
5824   FunctionArgList Args;
5825   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5826                           ImplicitParamDecl::Other);
5827   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5828                               ImplicitParamDecl::Other);
5829   Args.emplace_back(&Param);
5830   Args.emplace_back(&ParamOrig);
5831   const auto &FnInfo =
5832       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5833   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5834   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5835   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5836                                     Name, &CGM.getModule());
5837   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5838   Fn->setDoesNotRecurse();
5839   CodeGenFunction CGF(CGM);
5840   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5841   Address PrivateAddr = CGF.EmitLoadOfPointer(
5842       CGF.GetAddrOfLocalVar(&Param),
5843       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5844   llvm::Value *Size = nullptr;
5845   // If the size of the reduction item is non-constant, load it from global
5846   // threadprivate variable.
5847   if (RCG.getSizes(N).second) {
5848     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5849         CGF, CGM.getContext().getSizeType(),
5850         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5851     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5852                                 CGM.getContext().getSizeType(), Loc);
5853   }
5854   RCG.emitAggregateType(CGF, N, Size);
5855   LValue OrigLVal;
5856   // If initializer uses initializer from declare reduction construct, emit a
5857   // pointer to the address of the original reduction item (reuired by reduction
5858   // initializer)
5859   if (RCG.usesReductionInitializer(N)) {
5860     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5861     SharedAddr = CGF.EmitLoadOfPointer(
5862         SharedAddr,
5863         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5864     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5865   } else {
5866     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5867         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5868         CGM.getContext().VoidPtrTy);
5869   }
5870   // Emit the initializer:
5871   // %0 = bitcast void* %arg to <type>*
5872   // store <type> <init>, <type>* %0
5873   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5874                          [](CodeGenFunction &) { return false; });
5875   CGF.FinishFunction();
5876   return Fn;
5877 }
5878 
5879 /// Emits reduction combiner function:
5880 /// \code
5881 /// void @.red_comb(void* %arg0, void* %arg1) {
5882 /// %lhs = bitcast void* %arg0 to <type>*
5883 /// %rhs = bitcast void* %arg1 to <type>*
5884 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5885 /// store <type> %2, <type>* %lhs
5886 /// ret void
5887 /// }
5888 /// \endcode
5889 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5890                                            SourceLocation Loc,
5891                                            ReductionCodeGen &RCG, unsigned N,
5892                                            const Expr *ReductionOp,
5893                                            const Expr *LHS, const Expr *RHS,
5894                                            const Expr *PrivateRef) {
5895   ASTContext &C = CGM.getContext();
5896   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5897   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5898   FunctionArgList Args;
5899   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5900                                C.VoidPtrTy, ImplicitParamDecl::Other);
5901   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5902                             ImplicitParamDecl::Other);
5903   Args.emplace_back(&ParamInOut);
5904   Args.emplace_back(&ParamIn);
5905   const auto &FnInfo =
5906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5908   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5910                                     Name, &CGM.getModule());
5911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5912   Fn->setDoesNotRecurse();
5913   CodeGenFunction CGF(CGM);
5914   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5915   llvm::Value *Size = nullptr;
5916   // If the size of the reduction item is non-constant, load it from global
5917   // threadprivate variable.
5918   if (RCG.getSizes(N).second) {
5919     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5920         CGF, CGM.getContext().getSizeType(),
5921         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5922     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5923                                 CGM.getContext().getSizeType(), Loc);
5924   }
5925   RCG.emitAggregateType(CGF, N, Size);
5926   // Remap lhs and rhs variables to the addresses of the function arguments.
5927   // %lhs = bitcast void* %arg0 to <type>*
5928   // %rhs = bitcast void* %arg1 to <type>*
5929   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5930   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5931     // Pull out the pointer to the variable.
5932     Address PtrAddr = CGF.EmitLoadOfPointer(
5933         CGF.GetAddrOfLocalVar(&ParamInOut),
5934         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5935     return CGF.Builder.CreateElementBitCast(
5936         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5937   });
5938   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5939     // Pull out the pointer to the variable.
5940     Address PtrAddr = CGF.EmitLoadOfPointer(
5941         CGF.GetAddrOfLocalVar(&ParamIn),
5942         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5943     return CGF.Builder.CreateElementBitCast(
5944         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5945   });
5946   PrivateScope.Privatize();
5947   // Emit the combiner body:
5948   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5949   // store <type> %2, <type>* %lhs
5950   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5951       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5952       cast<DeclRefExpr>(RHS));
5953   CGF.FinishFunction();
5954   return Fn;
5955 }
5956 
5957 /// Emits reduction finalizer function:
5958 /// \code
5959 /// void @.red_fini(void* %arg) {
5960 /// %0 = bitcast void* %arg to <type>*
5961 /// <destroy>(<type>* %0)
5962 /// ret void
5963 /// }
5964 /// \endcode
5965 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5966                                            SourceLocation Loc,
5967                                            ReductionCodeGen &RCG, unsigned N) {
5968   if (!RCG.needCleanups(N))
5969     return nullptr;
5970   ASTContext &C = CGM.getContext();
5971   FunctionArgList Args;
5972   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5973                           ImplicitParamDecl::Other);
5974   Args.emplace_back(&Param);
5975   const auto &FnInfo =
5976       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5977   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5978   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5979   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5980                                     Name, &CGM.getModule());
5981   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5982   Fn->setDoesNotRecurse();
5983   CodeGenFunction CGF(CGM);
5984   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5985   Address PrivateAddr = CGF.EmitLoadOfPointer(
5986       CGF.GetAddrOfLocalVar(&Param),
5987       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988   llvm::Value *Size = nullptr;
5989   // If the size of the reduction item is non-constant, load it from global
5990   // threadprivate variable.
5991   if (RCG.getSizes(N).second) {
5992     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5993         CGF, CGM.getContext().getSizeType(),
5994         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5995     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5996                                 CGM.getContext().getSizeType(), Loc);
5997   }
5998   RCG.emitAggregateType(CGF, N, Size);
5999   // Emit the finalizer body:
6000   // <destroy>(<type>* %0)
6001   RCG.emitCleanups(CGF, N, PrivateAddr);
6002   CGF.FinishFunction(Loc);
6003   return Fn;
6004 }
6005 
6006 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6007     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6008     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6009   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6010     return nullptr;
6011 
6012   // Build typedef struct:
6013   // kmp_taskred_input {
6014   //   void *reduce_shar; // shared reduction item
6015   //   void *reduce_orig; // original reduction item used for initialization
6016   //   size_t reduce_size; // size of data item
6017   //   void *reduce_init; // data initialization routine
6018   //   void *reduce_fini; // data finalization routine
6019   //   void *reduce_comb; // data combiner routine
6020   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6021   // } kmp_taskred_input_t;
6022   ASTContext &C = CGM.getContext();
6023   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6024   RD->startDefinition();
6025   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6026   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6027   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6028   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6029   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6032       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6033   RD->completeDefinition();
6034   QualType RDType = C.getRecordType(RD);
6035   unsigned Size = Data.ReductionVars.size();
6036   llvm::APInt ArraySize(/*numBits=*/64, Size);
6037   QualType ArrayRDType = C.getConstantArrayType(
6038       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6039   // kmp_task_red_input_t .rd_input.[Size];
6040   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6041   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6042                        Data.ReductionCopies, Data.ReductionOps);
6043   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6044     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6045     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6046                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6047     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6048         TaskRedInput.getPointer(), Idxs,
6049         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6050         ".rd_input.gep.");
6051     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6052     // ElemLVal.reduce_shar = &Shareds[Cnt];
6053     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6054     RCG.emitSharedOrigLValue(CGF, Cnt);
6055     llvm::Value *CastedShared =
6056         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6057     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6058     // ElemLVal.reduce_orig = &Origs[Cnt];
6059     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6060     llvm::Value *CastedOrig =
6061         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6063     RCG.emitAggregateType(CGF, Cnt);
6064     llvm::Value *SizeValInChars;
6065     llvm::Value *SizeVal;
6066     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6067     // We use delayed creation/initialization for VLAs and array sections. It is
6068     // required because runtime does not provide the way to pass the sizes of
6069     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6070     // threadprivate global variables are used to store these values and use
6071     // them in the functions.
6072     bool DelayedCreation = !!SizeVal;
6073     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6074                                                /*isSigned=*/false);
6075     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6076     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6077     // ElemLVal.reduce_init = init;
6078     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6079     llvm::Value *InitAddr =
6080         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6081     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6082     // ElemLVal.reduce_fini = fini;
6083     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6084     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6085     llvm::Value *FiniAddr = Fini
6086                                 ? CGF.EmitCastToVoidPtr(Fini)
6087                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6088     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6089     // ElemLVal.reduce_comb = comb;
6090     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6091     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6092         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6093         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6094     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6095     // ElemLVal.flags = 0;
6096     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6097     if (DelayedCreation) {
6098       CGF.EmitStoreOfScalar(
6099           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6100           FlagsLVal);
6101     } else
6102       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6103                                  FlagsLVal.getType());
6104   }
6105   if (Data.IsReductionWithTaskMod) {
6106     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6107     // is_ws, int num, void *data);
6108     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6109     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6110                                                   CGM.IntTy, /*isSigned=*/true);
6111     llvm::Value *Args[] = {
6112         IdentTLoc, GTid,
6113         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6114                                /*isSigned=*/true),
6115         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6116         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6117             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6118     return CGF.EmitRuntimeCall(
6119         OMPBuilder.getOrCreateRuntimeFunction(
6120             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6121         Args);
6122   }
6123   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6124   llvm::Value *Args[] = {
6125       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6126                                 /*isSigned=*/true),
6127       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6128       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6129                                                       CGM.VoidPtrTy)};
6130   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6131                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6132                              Args);
6133 }
6134 
6135 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6136                                             SourceLocation Loc,
6137                                             bool IsWorksharingReduction) {
6138   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6139   // is_ws, int num, void *data);
6140   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6141   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6142                                                 CGM.IntTy, /*isSigned=*/true);
6143   llvm::Value *Args[] = {IdentTLoc, GTid,
6144                          llvm::ConstantInt::get(CGM.IntTy,
6145                                                 IsWorksharingReduction ? 1 : 0,
6146                                                 /*isSigned=*/true)};
6147   (void)CGF.EmitRuntimeCall(
6148       OMPBuilder.getOrCreateRuntimeFunction(
6149           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6150       Args);
6151 }
6152 
6153 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6154                                               SourceLocation Loc,
6155                                               ReductionCodeGen &RCG,
6156                                               unsigned N) {
6157   auto Sizes = RCG.getSizes(N);
6158   // Emit threadprivate global variable if the type is non-constant
6159   // (Sizes.second = nullptr).
6160   if (Sizes.second) {
6161     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6162                                                      /*isSigned=*/false);
6163     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6164         CGF, CGM.getContext().getSizeType(),
6165         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6166     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6167   }
6168 }
6169 
6170 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6171                                               SourceLocation Loc,
6172                                               llvm::Value *ReductionsPtr,
6173                                               LValue SharedLVal) {
6174   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6175   // *d);
6176   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6177                                                    CGM.IntTy,
6178                                                    /*isSigned=*/true),
6179                          ReductionsPtr,
6180                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6181                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6182   return Address(
6183       CGF.EmitRuntimeCall(
6184           OMPBuilder.getOrCreateRuntimeFunction(
6185               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6186           Args),
6187       SharedLVal.getAlignment());
6188 }
6189 
6190 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6191                                        SourceLocation Loc) {
6192   if (!CGF.HaveInsertPoint())
6193     return;
6194 
6195   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6196     OMPBuilder.createTaskwait(CGF.Builder);
6197   } else {
6198     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6199     // global_tid);
6200     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6201     // Ignore return result until untied tasks are supported.
6202     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6203                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6204                         Args);
6205   }
6206 
6207   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6208     Region->emitUntiedSwitch(CGF);
6209 }
6210 
6211 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6212                                            OpenMPDirectiveKind InnerKind,
6213                                            const RegionCodeGenTy &CodeGen,
6214                                            bool HasCancel) {
6215   if (!CGF.HaveInsertPoint())
6216     return;
6217   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6218   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6219 }
6220 
6221 namespace {
6222 enum RTCancelKind {
6223   CancelNoreq = 0,
6224   CancelParallel = 1,
6225   CancelLoop = 2,
6226   CancelSections = 3,
6227   CancelTaskgroup = 4
6228 };
6229 } // anonymous namespace
6230 
6231 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6232   RTCancelKind CancelKind = CancelNoreq;
6233   if (CancelRegion == OMPD_parallel)
6234     CancelKind = CancelParallel;
6235   else if (CancelRegion == OMPD_for)
6236     CancelKind = CancelLoop;
6237   else if (CancelRegion == OMPD_sections)
6238     CancelKind = CancelSections;
6239   else {
6240     assert(CancelRegion == OMPD_taskgroup);
6241     CancelKind = CancelTaskgroup;
6242   }
6243   return CancelKind;
6244 }
6245 
6246 void CGOpenMPRuntime::emitCancellationPointCall(
6247     CodeGenFunction &CGF, SourceLocation Loc,
6248     OpenMPDirectiveKind CancelRegion) {
6249   if (!CGF.HaveInsertPoint())
6250     return;
6251   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6252   // global_tid, kmp_int32 cncl_kind);
6253   if (auto *OMPRegionInfo =
6254           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6255     // For 'cancellation point taskgroup', the task region info may not have a
6256     // cancel. This may instead happen in another adjacent task.
6257     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6258       llvm::Value *Args[] = {
6259           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6260           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6261       // Ignore return result until untied tasks are supported.
6262       llvm::Value *Result = CGF.EmitRuntimeCall(
6263           OMPBuilder.getOrCreateRuntimeFunction(
6264               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6265           Args);
6266       // if (__kmpc_cancellationpoint()) {
6267       //   exit from construct;
6268       // }
6269       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6270       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6271       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6272       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6273       CGF.EmitBlock(ExitBB);
6274       // exit from construct;
6275       CodeGenFunction::JumpDest CancelDest =
6276           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6277       CGF.EmitBranchThroughCleanup(CancelDest);
6278       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6279     }
6280   }
6281 }
6282 
6283 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6284                                      const Expr *IfCond,
6285                                      OpenMPDirectiveKind CancelRegion) {
6286   if (!CGF.HaveInsertPoint())
6287     return;
6288   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6289   // kmp_int32 cncl_kind);
6290   auto &M = CGM.getModule();
6291   if (auto *OMPRegionInfo =
6292           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6293     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6294                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6295       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6296       llvm::Value *Args[] = {
6297           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6298           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6299       // Ignore return result until untied tasks are supported.
6300       llvm::Value *Result = CGF.EmitRuntimeCall(
6301           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6302       // if (__kmpc_cancel()) {
6303       //   exit from construct;
6304       // }
6305       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6306       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6307       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6308       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6309       CGF.EmitBlock(ExitBB);
6310       // exit from construct;
6311       CodeGenFunction::JumpDest CancelDest =
6312           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6313       CGF.EmitBranchThroughCleanup(CancelDest);
6314       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6315     };
6316     if (IfCond) {
6317       emitIfClause(CGF, IfCond, ThenGen,
6318                    [](CodeGenFunction &, PrePostActionTy &) {});
6319     } else {
6320       RegionCodeGenTy ThenRCG(ThenGen);
6321       ThenRCG(CGF);
6322     }
6323   }
6324 }
6325 
6326 namespace {
6327 /// Cleanup action for uses_allocators support.
6328 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6329   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6330 
6331 public:
6332   OMPUsesAllocatorsActionTy(
6333       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6334       : Allocators(Allocators) {}
6335   void Enter(CodeGenFunction &CGF) override {
6336     if (!CGF.HaveInsertPoint())
6337       return;
6338     for (const auto &AllocatorData : Allocators) {
6339       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6340           CGF, AllocatorData.first, AllocatorData.second);
6341     }
6342   }
6343   void Exit(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6348                                                         AllocatorData.first);
6349     }
6350   }
6351 };
6352 } // namespace
6353 
6354 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6355     const OMPExecutableDirective &D, StringRef ParentName,
6356     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6357     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6358   assert(!ParentName.empty() && "Invalid target region parent name!");
6359   HasEmittedTargetRegion = true;
6360   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6361   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6362     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6363       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6364       if (!D.AllocatorTraits)
6365         continue;
6366       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6367     }
6368   }
6369   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6370   CodeGen.setAction(UsesAllocatorAction);
6371   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6372                                    IsOffloadEntry, CodeGen);
6373 }
6374 
6375 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6376                                              const Expr *Allocator,
6377                                              const Expr *AllocatorTraits) {
6378   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6379   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6380   // Use default memspace handle.
6381   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6382   llvm::Value *NumTraits = llvm::ConstantInt::get(
6383       CGF.IntTy, cast<ConstantArrayType>(
6384                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6385                      ->getSize()
6386                      .getLimitedValue());
6387   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6388   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6389       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6390   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6391                                            AllocatorTraitsLVal.getBaseInfo(),
6392                                            AllocatorTraitsLVal.getTBAAInfo());
6393   llvm::Value *Traits =
6394       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6395 
6396   llvm::Value *AllocatorVal =
6397       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6398                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6399                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6400   // Store to allocator.
6401   CGF.EmitVarDecl(*cast<VarDecl>(
6402       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6403   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6404   AllocatorVal =
6405       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6406                                Allocator->getType(), Allocator->getExprLoc());
6407   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6408 }
6409 
6410 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6411                                              const Expr *Allocator) {
6412   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6413   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6414   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6415   llvm::Value *AllocatorVal =
6416       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6417   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6418                                           CGF.getContext().VoidPtrTy,
6419                                           Allocator->getExprLoc());
6420   (void)CGF.EmitRuntimeCall(
6421       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6422                                             OMPRTL___kmpc_destroy_allocator),
6423       {ThreadId, AllocatorVal});
6424 }
6425 
6426 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6427     const OMPExecutableDirective &D, StringRef ParentName,
6428     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6429     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6430   // Create a unique name for the entry function using the source location
6431   // information of the current target region. The name will be something like:
6432   //
6433   // __omp_offloading_DD_FFFF_PP_lBB
6434   //
6435   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6436   // mangled name of the function that encloses the target region and BB is the
6437   // line number of the target region.
6438 
6439   unsigned DeviceID;
6440   unsigned FileID;
6441   unsigned Line;
6442   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6443                            Line);
6444   SmallString<64> EntryFnName;
6445   {
6446     llvm::raw_svector_ostream OS(EntryFnName);
6447     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6448        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6449   }
6450 
6451   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6452 
6453   CodeGenFunction CGF(CGM, true);
6454   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6455   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6456 
6457   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6458 
6459   // If this target outline function is not an offload entry, we don't need to
6460   // register it.
6461   if (!IsOffloadEntry)
6462     return;
6463 
6464   // The target region ID is used by the runtime library to identify the current
6465   // target region, so it only has to be unique and not necessarily point to
6466   // anything. It could be the pointer to the outlined function that implements
6467   // the target region, but we aren't using that so that the compiler doesn't
6468   // need to keep that, and could therefore inline the host function if proven
6469   // worthwhile during optimization. In the other hand, if emitting code for the
6470   // device, the ID has to be the function address so that it can retrieved from
6471   // the offloading entry and launched by the runtime library. We also mark the
6472   // outlined function to have external linkage in case we are emitting code for
6473   // the device, because these functions will be entry points to the device.
6474 
6475   if (CGM.getLangOpts().OpenMPIsDevice) {
6476     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6477     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6478     OutlinedFn->setDSOLocal(false);
6479     if (CGM.getTriple().isAMDGCN())
6480       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6481   } else {
6482     std::string Name = getName({EntryFnName, "region_id"});
6483     OutlinedFnID = new llvm::GlobalVariable(
6484         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6485         llvm::GlobalValue::WeakAnyLinkage,
6486         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6487   }
6488 
6489   // Register the information for the entry associated with this target region.
6490   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6491       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6492       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6493 }
6494 
6495 /// Checks if the expression is constant or does not have non-trivial function
6496 /// calls.
6497 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6498   // We can skip constant expressions.
6499   // We can skip expressions with trivial calls or simple expressions.
6500   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6501           !E->hasNonTrivialCall(Ctx)) &&
6502          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6503 }
6504 
6505 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6506                                                     const Stmt *Body) {
6507   const Stmt *Child = Body->IgnoreContainers();
6508   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6509     Child = nullptr;
6510     for (const Stmt *S : C->body()) {
6511       if (const auto *E = dyn_cast<Expr>(S)) {
6512         if (isTrivial(Ctx, E))
6513           continue;
6514       }
6515       // Some of the statements can be ignored.
6516       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6517           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6518         continue;
6519       // Analyze declarations.
6520       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6521         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6522               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6523                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6524                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6525                   isa<UsingDirectiveDecl>(D) ||
6526                   isa<OMPDeclareReductionDecl>(D) ||
6527                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6528                 return true;
6529               const auto *VD = dyn_cast<VarDecl>(D);
6530               if (!VD)
6531                 return false;
6532               return VD->isConstexpr() ||
6533                      ((VD->getType().isTrivialType(Ctx) ||
6534                        VD->getType()->isReferenceType()) &&
6535                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6536             }))
6537           continue;
6538       }
6539       // Found multiple children - cannot get the one child only.
6540       if (Child)
6541         return nullptr;
6542       Child = S;
6543     }
6544     if (Child)
6545       Child = Child->IgnoreContainers();
6546   }
6547   return Child;
6548 }
6549 
6550 /// Emit the number of teams for a target directive.  Inspect the num_teams
6551 /// clause associated with a teams construct combined or closely nested
6552 /// with the target directive.
6553 ///
6554 /// Emit a team of size one for directives such as 'target parallel' that
6555 /// have no associated teams construct.
6556 ///
6557 /// Otherwise, return nullptr.
6558 static llvm::Value *
6559 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6560                                const OMPExecutableDirective &D) {
6561   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6562          "Clauses associated with the teams directive expected to be emitted "
6563          "only for the host!");
6564   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6565   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6566          "Expected target-based executable directive.");
6567   CGBuilderTy &Bld = CGF.Builder;
6568   switch (DirectiveKind) {
6569   case OMPD_target: {
6570     const auto *CS = D.getInnermostCapturedStmt();
6571     const auto *Body =
6572         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6573     const Stmt *ChildStmt =
6574         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6575     if (const auto *NestedDir =
6576             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6577       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6578         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6579           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6580           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6581           const Expr *NumTeams =
6582               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6583           llvm::Value *NumTeamsVal =
6584               CGF.EmitScalarExpr(NumTeams,
6585                                  /*IgnoreResultAssign*/ true);
6586           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6587                                    /*isSigned=*/true);
6588         }
6589         return Bld.getInt32(0);
6590       }
6591       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6592           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6593         return Bld.getInt32(1);
6594       return Bld.getInt32(0);
6595     }
6596     return nullptr;
6597   }
6598   case OMPD_target_teams:
6599   case OMPD_target_teams_distribute:
6600   case OMPD_target_teams_distribute_simd:
6601   case OMPD_target_teams_distribute_parallel_for:
6602   case OMPD_target_teams_distribute_parallel_for_simd: {
6603     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6604       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6605       const Expr *NumTeams =
6606           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6607       llvm::Value *NumTeamsVal =
6608           CGF.EmitScalarExpr(NumTeams,
6609                              /*IgnoreResultAssign*/ true);
6610       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6611                                /*isSigned=*/true);
6612     }
6613     return Bld.getInt32(0);
6614   }
6615   case OMPD_target_parallel:
6616   case OMPD_target_parallel_for:
6617   case OMPD_target_parallel_for_simd:
6618   case OMPD_target_simd:
6619     return Bld.getInt32(1);
6620   case OMPD_parallel:
6621   case OMPD_for:
6622   case OMPD_parallel_for:
6623   case OMPD_parallel_master:
6624   case OMPD_parallel_sections:
6625   case OMPD_for_simd:
6626   case OMPD_parallel_for_simd:
6627   case OMPD_cancel:
6628   case OMPD_cancellation_point:
6629   case OMPD_ordered:
6630   case OMPD_threadprivate:
6631   case OMPD_allocate:
6632   case OMPD_task:
6633   case OMPD_simd:
6634   case OMPD_tile:
6635   case OMPD_sections:
6636   case OMPD_section:
6637   case OMPD_single:
6638   case OMPD_master:
6639   case OMPD_critical:
6640   case OMPD_taskyield:
6641   case OMPD_barrier:
6642   case OMPD_taskwait:
6643   case OMPD_taskgroup:
6644   case OMPD_atomic:
6645   case OMPD_flush:
6646   case OMPD_depobj:
6647   case OMPD_scan:
6648   case OMPD_teams:
6649   case OMPD_target_data:
6650   case OMPD_target_exit_data:
6651   case OMPD_target_enter_data:
6652   case OMPD_distribute:
6653   case OMPD_distribute_simd:
6654   case OMPD_distribute_parallel_for:
6655   case OMPD_distribute_parallel_for_simd:
6656   case OMPD_teams_distribute:
6657   case OMPD_teams_distribute_simd:
6658   case OMPD_teams_distribute_parallel_for:
6659   case OMPD_teams_distribute_parallel_for_simd:
6660   case OMPD_target_update:
6661   case OMPD_declare_simd:
6662   case OMPD_declare_variant:
6663   case OMPD_begin_declare_variant:
6664   case OMPD_end_declare_variant:
6665   case OMPD_declare_target:
6666   case OMPD_end_declare_target:
6667   case OMPD_declare_reduction:
6668   case OMPD_declare_mapper:
6669   case OMPD_taskloop:
6670   case OMPD_taskloop_simd:
6671   case OMPD_master_taskloop:
6672   case OMPD_master_taskloop_simd:
6673   case OMPD_parallel_master_taskloop:
6674   case OMPD_parallel_master_taskloop_simd:
6675   case OMPD_requires:
6676   case OMPD_unknown:
6677     break;
6678   default:
6679     break;
6680   }
6681   llvm_unreachable("Unexpected directive kind.");
6682 }
6683 
6684 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6685                                   llvm::Value *DefaultThreadLimitVal) {
6686   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6687       CGF.getContext(), CS->getCapturedStmt());
6688   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6689     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6690       llvm::Value *NumThreads = nullptr;
6691       llvm::Value *CondVal = nullptr;
6692       // Handle if clause. If if clause present, the number of threads is
6693       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6694       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6695         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6696         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6697         const OMPIfClause *IfClause = nullptr;
6698         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6699           if (C->getNameModifier() == OMPD_unknown ||
6700               C->getNameModifier() == OMPD_parallel) {
6701             IfClause = C;
6702             break;
6703           }
6704         }
6705         if (IfClause) {
6706           const Expr *Cond = IfClause->getCondition();
6707           bool Result;
6708           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6709             if (!Result)
6710               return CGF.Builder.getInt32(1);
6711           } else {
6712             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6713             if (const auto *PreInit =
6714                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6715               for (const auto *I : PreInit->decls()) {
6716                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6717                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6718                 } else {
6719                   CodeGenFunction::AutoVarEmission Emission =
6720                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6721                   CGF.EmitAutoVarCleanups(Emission);
6722                 }
6723               }
6724             }
6725             CondVal = CGF.EvaluateExprAsBool(Cond);
6726           }
6727         }
6728       }
6729       // Check the value of num_threads clause iff if clause was not specified
6730       // or is not evaluated to false.
6731       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6732         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6733         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6734         const auto *NumThreadsClause =
6735             Dir->getSingleClause<OMPNumThreadsClause>();
6736         CodeGenFunction::LexicalScope Scope(
6737             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6738         if (const auto *PreInit =
6739                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6740           for (const auto *I : PreInit->decls()) {
6741             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6742               CGF.EmitVarDecl(cast<VarDecl>(*I));
6743             } else {
6744               CodeGenFunction::AutoVarEmission Emission =
6745                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6746               CGF.EmitAutoVarCleanups(Emission);
6747             }
6748           }
6749         }
6750         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6751         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6752                                                /*isSigned=*/false);
6753         if (DefaultThreadLimitVal)
6754           NumThreads = CGF.Builder.CreateSelect(
6755               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6756               DefaultThreadLimitVal, NumThreads);
6757       } else {
6758         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6759                                            : CGF.Builder.getInt32(0);
6760       }
6761       // Process condition of the if clause.
6762       if (CondVal) {
6763         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6764                                               CGF.Builder.getInt32(1));
6765       }
6766       return NumThreads;
6767     }
6768     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6769       return CGF.Builder.getInt32(1);
6770     return DefaultThreadLimitVal;
6771   }
6772   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6773                                : CGF.Builder.getInt32(0);
6774 }
6775 
6776 /// Emit the number of threads for a target directive.  Inspect the
6777 /// thread_limit clause associated with a teams construct combined or closely
6778 /// nested with the target directive.
6779 ///
6780 /// Emit the num_threads clause for directives such as 'target parallel' that
6781 /// have no associated teams construct.
6782 ///
6783 /// Otherwise, return nullptr.
6784 static llvm::Value *
6785 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6786                                  const OMPExecutableDirective &D) {
6787   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6788          "Clauses associated with the teams directive expected to be emitted "
6789          "only for the host!");
6790   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6791   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6792          "Expected target-based executable directive.");
6793   CGBuilderTy &Bld = CGF.Builder;
6794   llvm::Value *ThreadLimitVal = nullptr;
6795   llvm::Value *NumThreadsVal = nullptr;
6796   switch (DirectiveKind) {
6797   case OMPD_target: {
6798     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6799     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6800       return NumThreads;
6801     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6802         CGF.getContext(), CS->getCapturedStmt());
6803     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6804       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6805         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6806         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6807         const auto *ThreadLimitClause =
6808             Dir->getSingleClause<OMPThreadLimitClause>();
6809         CodeGenFunction::LexicalScope Scope(
6810             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6811         if (const auto *PreInit =
6812                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6813           for (const auto *I : PreInit->decls()) {
6814             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6815               CGF.EmitVarDecl(cast<VarDecl>(*I));
6816             } else {
6817               CodeGenFunction::AutoVarEmission Emission =
6818                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6819               CGF.EmitAutoVarCleanups(Emission);
6820             }
6821           }
6822         }
6823         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6824             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6825         ThreadLimitVal =
6826             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6827       }
6828       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6829           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6830         CS = Dir->getInnermostCapturedStmt();
6831         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6832             CGF.getContext(), CS->getCapturedStmt());
6833         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6834       }
6835       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6836           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6837         CS = Dir->getInnermostCapturedStmt();
6838         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6839           return NumThreads;
6840       }
6841       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6842         return Bld.getInt32(1);
6843     }
6844     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6845   }
6846   case OMPD_target_teams: {
6847     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6848       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6849       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6850       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6851           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6852       ThreadLimitVal =
6853           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6854     }
6855     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6856     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6857       return NumThreads;
6858     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6859         CGF.getContext(), CS->getCapturedStmt());
6860     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6861       if (Dir->getDirectiveKind() == OMPD_distribute) {
6862         CS = Dir->getInnermostCapturedStmt();
6863         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6864           return NumThreads;
6865       }
6866     }
6867     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6868   }
6869   case OMPD_target_teams_distribute:
6870     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6871       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6872       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6873       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6874           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6875       ThreadLimitVal =
6876           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6877     }
6878     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6879   case OMPD_target_parallel:
6880   case OMPD_target_parallel_for:
6881   case OMPD_target_parallel_for_simd:
6882   case OMPD_target_teams_distribute_parallel_for:
6883   case OMPD_target_teams_distribute_parallel_for_simd: {
6884     llvm::Value *CondVal = nullptr;
6885     // Handle if clause. If if clause present, the number of threads is
6886     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6887     if (D.hasClausesOfKind<OMPIfClause>()) {
6888       const OMPIfClause *IfClause = nullptr;
6889       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6890         if (C->getNameModifier() == OMPD_unknown ||
6891             C->getNameModifier() == OMPD_parallel) {
6892           IfClause = C;
6893           break;
6894         }
6895       }
6896       if (IfClause) {
6897         const Expr *Cond = IfClause->getCondition();
6898         bool Result;
6899         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6900           if (!Result)
6901             return Bld.getInt32(1);
6902         } else {
6903           CodeGenFunction::RunCleanupsScope Scope(CGF);
6904           CondVal = CGF.EvaluateExprAsBool(Cond);
6905         }
6906       }
6907     }
6908     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6909       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6910       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6911       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6912           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6913       ThreadLimitVal =
6914           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6915     }
6916     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6917       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6918       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6919       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6920           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6921       NumThreadsVal =
6922           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6923       ThreadLimitVal = ThreadLimitVal
6924                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6925                                                                 ThreadLimitVal),
6926                                               NumThreadsVal, ThreadLimitVal)
6927                            : NumThreadsVal;
6928     }
6929     if (!ThreadLimitVal)
6930       ThreadLimitVal = Bld.getInt32(0);
6931     if (CondVal)
6932       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6933     return ThreadLimitVal;
6934   }
6935   case OMPD_target_teams_distribute_simd:
6936   case OMPD_target_simd:
6937     return Bld.getInt32(1);
6938   case OMPD_parallel:
6939   case OMPD_for:
6940   case OMPD_parallel_for:
6941   case OMPD_parallel_master:
6942   case OMPD_parallel_sections:
6943   case OMPD_for_simd:
6944   case OMPD_parallel_for_simd:
6945   case OMPD_cancel:
6946   case OMPD_cancellation_point:
6947   case OMPD_ordered:
6948   case OMPD_threadprivate:
6949   case OMPD_allocate:
6950   case OMPD_task:
6951   case OMPD_simd:
6952   case OMPD_tile:
6953   case OMPD_sections:
6954   case OMPD_section:
6955   case OMPD_single:
6956   case OMPD_master:
6957   case OMPD_critical:
6958   case OMPD_taskyield:
6959   case OMPD_barrier:
6960   case OMPD_taskwait:
6961   case OMPD_taskgroup:
6962   case OMPD_atomic:
6963   case OMPD_flush:
6964   case OMPD_depobj:
6965   case OMPD_scan:
6966   case OMPD_teams:
6967   case OMPD_target_data:
6968   case OMPD_target_exit_data:
6969   case OMPD_target_enter_data:
6970   case OMPD_distribute:
6971   case OMPD_distribute_simd:
6972   case OMPD_distribute_parallel_for:
6973   case OMPD_distribute_parallel_for_simd:
6974   case OMPD_teams_distribute:
6975   case OMPD_teams_distribute_simd:
6976   case OMPD_teams_distribute_parallel_for:
6977   case OMPD_teams_distribute_parallel_for_simd:
6978   case OMPD_target_update:
6979   case OMPD_declare_simd:
6980   case OMPD_declare_variant:
6981   case OMPD_begin_declare_variant:
6982   case OMPD_end_declare_variant:
6983   case OMPD_declare_target:
6984   case OMPD_end_declare_target:
6985   case OMPD_declare_reduction:
6986   case OMPD_declare_mapper:
6987   case OMPD_taskloop:
6988   case OMPD_taskloop_simd:
6989   case OMPD_master_taskloop:
6990   case OMPD_master_taskloop_simd:
6991   case OMPD_parallel_master_taskloop:
6992   case OMPD_parallel_master_taskloop_simd:
6993   case OMPD_requires:
6994   case OMPD_unknown:
6995     break;
6996   default:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr or use_device_addr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// 0x800 is reserved for compatibility with XLC.
7047     /// Produce a runtime error if the data is not already allocated.
7048     OMP_MAP_PRESENT = 0x1000,
7049     /// Signal that the runtime library should use args as an array of
7050     /// descriptor_dim pointers and use args_size as dims. Used when we have
7051     /// non-contiguous list items in target update directive
7052     OMP_MAP_NON_CONTIG = 0x100000000000,
7053     /// The 16 MSBs of the flags indicate whether the entry is member of some
7054     /// struct/class.
7055     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7056     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7057   };
7058 
7059   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7060   static unsigned getFlagMemberOffset() {
7061     unsigned Offset = 0;
7062     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7063          Remain = Remain >> 1)
7064       Offset++;
7065     return Offset;
7066   }
7067 
7068   /// Class that holds debugging information for a data mapping to be passed to
7069   /// the runtime library.
7070   class MappingExprInfo {
7071     /// The variable declaration used for the data mapping.
7072     const ValueDecl *MapDecl = nullptr;
7073     /// The original expression used in the map clause, or null if there is
7074     /// none.
7075     const Expr *MapExpr = nullptr;
7076 
7077   public:
7078     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7079         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7080 
7081     const ValueDecl *getMapDecl() const { return MapDecl; }
7082     const Expr *getMapExpr() const { return MapExpr; }
7083   };
7084 
7085   /// Class that associates information with a base pointer to be passed to the
7086   /// runtime library.
7087   class BasePointerInfo {
7088     /// The base pointer.
7089     llvm::Value *Ptr = nullptr;
7090     /// The base declaration that refers to this device pointer, or null if
7091     /// there is none.
7092     const ValueDecl *DevPtrDecl = nullptr;
7093 
7094   public:
7095     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7096         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7097     llvm::Value *operator*() const { return Ptr; }
7098     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7099     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7100   };
7101 
7102   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7103   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7104   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7105   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7106   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7107   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7108   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7109 
7110   /// This structure contains combined information generated for mappable
7111   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7112   /// mappers, and non-contiguous information.
7113   struct MapCombinedInfoTy {
7114     struct StructNonContiguousInfo {
7115       bool IsNonContiguous = false;
7116       MapDimArrayTy Dims;
7117       MapNonContiguousArrayTy Offsets;
7118       MapNonContiguousArrayTy Counts;
7119       MapNonContiguousArrayTy Strides;
7120     };
7121     MapExprsArrayTy Exprs;
7122     MapBaseValuesArrayTy BasePointers;
7123     MapValuesArrayTy Pointers;
7124     MapValuesArrayTy Sizes;
7125     MapFlagsArrayTy Types;
7126     MapMappersArrayTy Mappers;
7127     StructNonContiguousInfo NonContigInfo;
7128 
7129     /// Append arrays in \a CurInfo.
7130     void append(MapCombinedInfoTy &CurInfo) {
7131       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7132       BasePointers.append(CurInfo.BasePointers.begin(),
7133                           CurInfo.BasePointers.end());
7134       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7135       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7136       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7137       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7138       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7139                                  CurInfo.NonContigInfo.Dims.end());
7140       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7141                                     CurInfo.NonContigInfo.Offsets.end());
7142       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7143                                    CurInfo.NonContigInfo.Counts.end());
7144       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7145                                     CurInfo.NonContigInfo.Strides.end());
7146     }
7147   };
7148 
7149   /// Map between a struct and the its lowest & highest elements which have been
7150   /// mapped.
7151   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7152   ///                    HE(FieldIndex, Pointer)}
7153   struct StructRangeInfoTy {
7154     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7155         0, Address::invalid()};
7156     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7157         0, Address::invalid()};
7158     Address Base = Address::invalid();
7159     bool IsArraySection = false;
7160     bool HasCompleteRecord = false;
7161   };
7162 
7163 private:
7164   /// Kind that defines how a device pointer has to be returned.
7165   struct MapInfo {
7166     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7167     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7168     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7169     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7170     bool ReturnDevicePointer = false;
7171     bool IsImplicit = false;
7172     const ValueDecl *Mapper = nullptr;
7173     const Expr *VarRef = nullptr;
7174     bool ForDeviceAddr = false;
7175 
7176     MapInfo() = default;
7177     MapInfo(
7178         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7179         OpenMPMapClauseKind MapType,
7180         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7181         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7182         bool ReturnDevicePointer, bool IsImplicit,
7183         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7184         bool ForDeviceAddr = false)
7185         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7186           MotionModifiers(MotionModifiers),
7187           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7188           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7189   };
7190 
7191   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7192   /// member and there is no map information about it, then emission of that
7193   /// entry is deferred until the whole struct has been processed.
7194   struct DeferredDevicePtrEntryTy {
7195     const Expr *IE = nullptr;
7196     const ValueDecl *VD = nullptr;
7197     bool ForDeviceAddr = false;
7198 
7199     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7200                              bool ForDeviceAddr)
7201         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7202   };
7203 
7204   /// The target directive from where the mappable clauses were extracted. It
7205   /// is either a executable directive or a user-defined mapper directive.
7206   llvm::PointerUnion<const OMPExecutableDirective *,
7207                      const OMPDeclareMapperDecl *>
7208       CurDir;
7209 
7210   /// Function the directive is being generated for.
7211   CodeGenFunction &CGF;
7212 
7213   /// Set of all first private variables in the current directive.
7214   /// bool data is set to true if the variable is implicitly marked as
7215   /// firstprivate, false otherwise.
7216   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7217 
7218   /// Map between device pointer declarations and their expression components.
7219   /// The key value for declarations in 'this' is null.
7220   llvm::DenseMap<
7221       const ValueDecl *,
7222       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7223       DevPointersMap;
7224 
7225   llvm::Value *getExprTypeSize(const Expr *E) const {
7226     QualType ExprTy = E->getType().getCanonicalType();
7227 
7228     // Calculate the size for array shaping expression.
7229     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7230       llvm::Value *Size =
7231           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7232       for (const Expr *SE : OAE->getDimensions()) {
7233         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7234         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7235                                       CGF.getContext().getSizeType(),
7236                                       SE->getExprLoc());
7237         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7238       }
7239       return Size;
7240     }
7241 
7242     // Reference types are ignored for mapping purposes.
7243     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7244       ExprTy = RefTy->getPointeeType().getCanonicalType();
7245 
7246     // Given that an array section is considered a built-in type, we need to
7247     // do the calculation based on the length of the section instead of relying
7248     // on CGF.getTypeSize(E->getType()).
7249     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7250       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7251                             OAE->getBase()->IgnoreParenImpCasts())
7252                             .getCanonicalType();
7253 
7254       // If there is no length associated with the expression and lower bound is
7255       // not specified too, that means we are using the whole length of the
7256       // base.
7257       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7258           !OAE->getLowerBound())
7259         return CGF.getTypeSize(BaseTy);
7260 
7261       llvm::Value *ElemSize;
7262       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7263         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7264       } else {
7265         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7266         assert(ATy && "Expecting array type if not a pointer type.");
7267         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7268       }
7269 
7270       // If we don't have a length at this point, that is because we have an
7271       // array section with a single element.
7272       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7273         return ElemSize;
7274 
7275       if (const Expr *LenExpr = OAE->getLength()) {
7276         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7277         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7278                                              CGF.getContext().getSizeType(),
7279                                              LenExpr->getExprLoc());
7280         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7281       }
7282       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7283              OAE->getLowerBound() && "expected array_section[lb:].");
7284       // Size = sizetype - lb * elemtype;
7285       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7286       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7287       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7288                                        CGF.getContext().getSizeType(),
7289                                        OAE->getLowerBound()->getExprLoc());
7290       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7291       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7292       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7293       LengthVal = CGF.Builder.CreateSelect(
7294           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7295       return LengthVal;
7296     }
7297     return CGF.getTypeSize(ExprTy);
7298   }
7299 
7300   /// Return the corresponding bits for a given map clause modifier. Add
7301   /// a flag marking the map as a pointer if requested. Add a flag marking the
7302   /// map as the first one of a series of maps that relate to the same map
7303   /// expression.
7304   OpenMPOffloadMappingFlags getMapTypeBits(
7305       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7306       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7307       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7308     OpenMPOffloadMappingFlags Bits =
7309         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7310     switch (MapType) {
7311     case OMPC_MAP_alloc:
7312     case OMPC_MAP_release:
7313       // alloc and release is the default behavior in the runtime library,  i.e.
7314       // if we don't pass any bits alloc/release that is what the runtime is
7315       // going to do. Therefore, we don't need to signal anything for these two
7316       // type modifiers.
7317       break;
7318     case OMPC_MAP_to:
7319       Bits |= OMP_MAP_TO;
7320       break;
7321     case OMPC_MAP_from:
7322       Bits |= OMP_MAP_FROM;
7323       break;
7324     case OMPC_MAP_tofrom:
7325       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7326       break;
7327     case OMPC_MAP_delete:
7328       Bits |= OMP_MAP_DELETE;
7329       break;
7330     case OMPC_MAP_unknown:
7331       llvm_unreachable("Unexpected map type!");
7332     }
7333     if (AddPtrFlag)
7334       Bits |= OMP_MAP_PTR_AND_OBJ;
7335     if (AddIsTargetParamFlag)
7336       Bits |= OMP_MAP_TARGET_PARAM;
7337     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7338         != MapModifiers.end())
7339       Bits |= OMP_MAP_ALWAYS;
7340     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7341         != MapModifiers.end())
7342       Bits |= OMP_MAP_CLOSE;
7343     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7344             MapModifiers.end() ||
7345         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7346             MotionModifiers.end())
7347       Bits |= OMP_MAP_PRESENT;
7348     if (IsNonContiguous)
7349       Bits |= OMP_MAP_NON_CONTIG;
7350     return Bits;
7351   }
7352 
7353   /// Return true if the provided expression is a final array section. A
7354   /// final array section, is one whose length can't be proved to be one.
7355   bool isFinalArraySectionExpression(const Expr *E) const {
7356     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7357 
7358     // It is not an array section and therefore not a unity-size one.
7359     if (!OASE)
7360       return false;
7361 
7362     // An array section with no colon always refer to a single element.
7363     if (OASE->getColonLocFirst().isInvalid())
7364       return false;
7365 
7366     const Expr *Length = OASE->getLength();
7367 
7368     // If we don't have a length we have to check if the array has size 1
7369     // for this dimension. Also, we should always expect a length if the
7370     // base type is pointer.
7371     if (!Length) {
7372       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7373                              OASE->getBase()->IgnoreParenImpCasts())
7374                              .getCanonicalType();
7375       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7376         return ATy->getSize().getSExtValue() != 1;
7377       // If we don't have a constant dimension length, we have to consider
7378       // the current section as having any size, so it is not necessarily
7379       // unitary. If it happen to be unity size, that's user fault.
7380       return true;
7381     }
7382 
7383     // Check if the length evaluates to 1.
7384     Expr::EvalResult Result;
7385     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7386       return true; // Can have more that size 1.
7387 
7388     llvm::APSInt ConstLength = Result.Val.getInt();
7389     return ConstLength.getSExtValue() != 1;
7390   }
7391 
7392   /// Generate the base pointers, section pointers, sizes, map type bits, and
7393   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7394   /// map type, map or motion modifiers, and expression components.
7395   /// \a IsFirstComponent should be set to true if the provided set of
7396   /// components is the first associated with a capture.
7397   void generateInfoForComponentList(
7398       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7399       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7400       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7401       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7402       bool IsFirstComponentList, bool IsImplicit,
7403       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7404       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7405       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7406           OverlappedElements = llvm::None) const {
7407     // The following summarizes what has to be generated for each map and the
7408     // types below. The generated information is expressed in this order:
7409     // base pointer, section pointer, size, flags
7410     // (to add to the ones that come from the map type and modifier).
7411     //
7412     // double d;
7413     // int i[100];
7414     // float *p;
7415     //
7416     // struct S1 {
7417     //   int i;
7418     //   float f[50];
7419     // }
7420     // struct S2 {
7421     //   int i;
7422     //   float f[50];
7423     //   S1 s;
7424     //   double *p;
7425     //   struct S2 *ps;
7426     // }
7427     // S2 s;
7428     // S2 *ps;
7429     //
7430     // map(d)
7431     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7432     //
7433     // map(i)
7434     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7435     //
7436     // map(i[1:23])
7437     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7438     //
7439     // map(p)
7440     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7441     //
7442     // map(p[1:24])
7443     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7444     // in unified shared memory mode or for local pointers
7445     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7446     //
7447     // map(s)
7448     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7449     //
7450     // map(s.i)
7451     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7452     //
7453     // map(s.s.f)
7454     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7455     //
7456     // map(s.p)
7457     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7458     //
7459     // map(to: s.p[:22])
7460     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7461     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7462     // &(s.p), &(s.p[0]), 22*sizeof(double),
7463     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7464     // (*) alloc space for struct members, only this is a target parameter
7465     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7466     //      optimizes this entry out, same in the examples below)
7467     // (***) map the pointee (map: to)
7468     //
7469     // map(s.ps)
7470     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7471     //
7472     // map(from: s.ps->s.i)
7473     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7474     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7475     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7476     //
7477     // map(to: s.ps->ps)
7478     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7479     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7480     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7481     //
7482     // map(s.ps->ps->ps)
7483     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7484     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7485     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7486     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7487     //
7488     // map(to: s.ps->ps->s.f[:22])
7489     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7490     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7491     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7492     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7493     //
7494     // map(ps)
7495     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7496     //
7497     // map(ps->i)
7498     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7499     //
7500     // map(ps->s.f)
7501     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7502     //
7503     // map(from: ps->p)
7504     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7505     //
7506     // map(to: ps->p[:22])
7507     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7508     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7509     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7510     //
7511     // map(ps->ps)
7512     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7513     //
7514     // map(from: ps->ps->s.i)
7515     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7516     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7517     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7518     //
7519     // map(from: ps->ps->ps)
7520     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7521     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7522     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7523     //
7524     // map(ps->ps->ps->ps)
7525     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7526     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7527     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7528     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7529     //
7530     // map(to: ps->ps->ps->s.f[:22])
7531     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7532     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7533     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7534     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7535     //
7536     // map(to: s.f[:22]) map(from: s.p[:33])
7537     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7538     //     sizeof(double*) (**), TARGET_PARAM
7539     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7540     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7541     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7542     // (*) allocate contiguous space needed to fit all mapped members even if
7543     //     we allocate space for members not mapped (in this example,
7544     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7545     //     them as well because they fall between &s.f[0] and &s.p)
7546     //
7547     // map(from: s.f[:22]) map(to: ps->p[:33])
7548     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7549     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7550     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7551     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7552     // (*) the struct this entry pertains to is the 2nd element in the list of
7553     //     arguments, hence MEMBER_OF(2)
7554     //
7555     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7556     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7557     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7558     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7559     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7560     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7561     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7562     // (*) the struct this entry pertains to is the 4th element in the list
7563     //     of arguments, hence MEMBER_OF(4)
7564 
7565     // Track if the map information being generated is the first for a capture.
7566     bool IsCaptureFirstInfo = IsFirstComponentList;
7567     // When the variable is on a declare target link or in a to clause with
7568     // unified memory, a reference is needed to hold the host/device address
7569     // of the variable.
7570     bool RequiresReference = false;
7571 
7572     // Scan the components from the base to the complete expression.
7573     auto CI = Components.rbegin();
7574     auto CE = Components.rend();
7575     auto I = CI;
7576 
7577     // Track if the map information being generated is the first for a list of
7578     // components.
7579     bool IsExpressionFirstInfo = true;
7580     bool FirstPointerInComplexData = false;
7581     Address BP = Address::invalid();
7582     const Expr *AssocExpr = I->getAssociatedExpression();
7583     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7584     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7585     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7586 
7587     if (isa<MemberExpr>(AssocExpr)) {
7588       // The base is the 'this' pointer. The content of the pointer is going
7589       // to be the base of the field being mapped.
7590       BP = CGF.LoadCXXThisAddress();
7591     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7592                (OASE &&
7593                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7594       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7595     } else if (OAShE &&
7596                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7597       BP = Address(
7598           CGF.EmitScalarExpr(OAShE->getBase()),
7599           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7600     } else {
7601       // The base is the reference to the variable.
7602       // BP = &Var.
7603       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7604       if (const auto *VD =
7605               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7606         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7607                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7608           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7609               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7610                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7611             RequiresReference = true;
7612             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7613           }
7614         }
7615       }
7616 
7617       // If the variable is a pointer and is being dereferenced (i.e. is not
7618       // the last component), the base has to be the pointer itself, not its
7619       // reference. References are ignored for mapping purposes.
7620       QualType Ty =
7621           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7622       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7623         // No need to generate individual map information for the pointer, it
7624         // can be associated with the combined storage if shared memory mode is
7625         // active or the base declaration is not global variable.
7626         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7627         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7628             !VD || VD->hasLocalStorage())
7629           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7630         else
7631           FirstPointerInComplexData = true;
7632         ++I;
7633       }
7634     }
7635 
7636     // Track whether a component of the list should be marked as MEMBER_OF some
7637     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7638     // in a component list should be marked as MEMBER_OF, all subsequent entries
7639     // do not belong to the base struct. E.g.
7640     // struct S2 s;
7641     // s.ps->ps->ps->f[:]
7642     //   (1) (2) (3) (4)
7643     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7644     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7645     // is the pointee of ps(2) which is not member of struct s, so it should not
7646     // be marked as such (it is still PTR_AND_OBJ).
7647     // The variable is initialized to false so that PTR_AND_OBJ entries which
7648     // are not struct members are not considered (e.g. array of pointers to
7649     // data).
7650     bool ShouldBeMemberOf = false;
7651 
7652     // Variable keeping track of whether or not we have encountered a component
7653     // in the component list which is a member expression. Useful when we have a
7654     // pointer or a final array section, in which case it is the previous
7655     // component in the list which tells us whether we have a member expression.
7656     // E.g. X.f[:]
7657     // While processing the final array section "[:]" it is "f" which tells us
7658     // whether we are dealing with a member of a declared struct.
7659     const MemberExpr *EncounteredME = nullptr;
7660 
7661     // Track for the total number of dimension. Start from one for the dummy
7662     // dimension.
7663     uint64_t DimSize = 1;
7664 
7665     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7666 
7667     for (; I != CE; ++I) {
7668       // If the current component is member of a struct (parent struct) mark it.
7669       if (!EncounteredME) {
7670         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7671         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7672         // as MEMBER_OF the parent struct.
7673         if (EncounteredME) {
7674           ShouldBeMemberOf = true;
7675           // Do not emit as complex pointer if this is actually not array-like
7676           // expression.
7677           if (FirstPointerInComplexData) {
7678             QualType Ty = std::prev(I)
7679                               ->getAssociatedDeclaration()
7680                               ->getType()
7681                               .getNonReferenceType();
7682             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7683             FirstPointerInComplexData = false;
7684           }
7685         }
7686       }
7687 
7688       auto Next = std::next(I);
7689 
7690       // We need to generate the addresses and sizes if this is the last
7691       // component, if the component is a pointer or if it is an array section
7692       // whose length can't be proved to be one. If this is a pointer, it
7693       // becomes the base address for the following components.
7694 
7695       // A final array section, is one whose length can't be proved to be one.
7696       // If the map item is non-contiguous then we don't treat any array section
7697       // as final array section.
7698       bool IsFinalArraySection =
7699           !IsNonContiguous &&
7700           isFinalArraySectionExpression(I->getAssociatedExpression());
7701 
7702       // If we have a declaration for the mapping use that, otherwise use
7703       // the base declaration of the map clause.
7704       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7705                                      ? I->getAssociatedDeclaration()
7706                                      : BaseDecl;
7707 
7708       // Get information on whether the element is a pointer. Have to do a
7709       // special treatment for array sections given that they are built-in
7710       // types.
7711       const auto *OASE =
7712           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7713       const auto *OAShE =
7714           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7715       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7716       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7717       bool IsPointer =
7718           OAShE ||
7719           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7720                        .getCanonicalType()
7721                        ->isAnyPointerType()) ||
7722           I->getAssociatedExpression()->getType()->isAnyPointerType();
7723       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7724 
7725       if (OASE)
7726         ++DimSize;
7727 
7728       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7729         // If this is not the last component, we expect the pointer to be
7730         // associated with an array expression or member expression.
7731         assert((Next == CE ||
7732                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7733                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7734                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7735                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7736                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7737                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7738                "Unexpected expression");
7739 
7740         Address LB = Address::invalid();
7741         if (OAShE) {
7742           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7743                        CGF.getContext().getTypeAlignInChars(
7744                            OAShE->getBase()->getType()));
7745         } else {
7746           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7747                    .getAddress(CGF);
7748         }
7749 
7750         // If this component is a pointer inside the base struct then we don't
7751         // need to create any entry for it - it will be combined with the object
7752         // it is pointing to into a single PTR_AND_OBJ entry.
7753         bool IsMemberPointerOrAddr =
7754             (IsPointer || ForDeviceAddr) && EncounteredME &&
7755             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7756              EncounteredME);
7757         if (!OverlappedElements.empty()) {
7758           // Handle base element with the info for overlapped elements.
7759           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7760           assert(Next == CE &&
7761                  "Expected last element for the overlapped elements.");
7762           assert(!IsPointer &&
7763                  "Unexpected base element with the pointer type.");
7764           // Mark the whole struct as the struct that requires allocation on the
7765           // device.
7766           PartialStruct.LowestElem = {0, LB};
7767           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7768               I->getAssociatedExpression()->getType());
7769           Address HB = CGF.Builder.CreateConstGEP(
7770               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7771                                                               CGF.VoidPtrTy),
7772               TypeSize.getQuantity() - 1);
7773           PartialStruct.HighestElem = {
7774               std::numeric_limits<decltype(
7775                   PartialStruct.HighestElem.first)>::max(),
7776               HB};
7777           PartialStruct.Base = BP;
7778           // Emit data for non-overlapped data.
7779           OpenMPOffloadMappingFlags Flags =
7780               OMP_MAP_MEMBER_OF |
7781               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7782                              /*AddPtrFlag=*/false,
7783                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7784           LB = BP;
7785           llvm::Value *Size = nullptr;
7786           // Do bitcopy of all non-overlapped structure elements.
7787           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7788                    Component : OverlappedElements) {
7789             Address ComponentLB = Address::invalid();
7790             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7791                  Component) {
7792               if (MC.getAssociatedDeclaration()) {
7793                 ComponentLB =
7794                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7795                         .getAddress(CGF);
7796                 Size = CGF.Builder.CreatePtrDiff(
7797                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7798                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7799                 break;
7800               }
7801             }
7802             assert(Size && "Failed to determine structure size");
7803             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7804             CombinedInfo.BasePointers.push_back(BP.getPointer());
7805             CombinedInfo.Pointers.push_back(LB.getPointer());
7806             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7807                 Size, CGF.Int64Ty, /*isSigned=*/true));
7808             CombinedInfo.Types.push_back(Flags);
7809             CombinedInfo.Mappers.push_back(nullptr);
7810             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7811                                                                       : 1);
7812             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7813           }
7814           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7815           CombinedInfo.BasePointers.push_back(BP.getPointer());
7816           CombinedInfo.Pointers.push_back(LB.getPointer());
7817           Size = CGF.Builder.CreatePtrDiff(
7818               CGF.EmitCastToVoidPtr(
7819                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7820               CGF.EmitCastToVoidPtr(LB.getPointer()));
7821           CombinedInfo.Sizes.push_back(
7822               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7823           CombinedInfo.Types.push_back(Flags);
7824           CombinedInfo.Mappers.push_back(nullptr);
7825           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7826                                                                     : 1);
7827           break;
7828         }
7829         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7830         if (!IsMemberPointerOrAddr ||
7831             (Next == CE && MapType != OMPC_MAP_unknown)) {
7832           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7833           CombinedInfo.BasePointers.push_back(BP.getPointer());
7834           CombinedInfo.Pointers.push_back(LB.getPointer());
7835           CombinedInfo.Sizes.push_back(
7836               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7837           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7838                                                                     : 1);
7839 
7840           // If Mapper is valid, the last component inherits the mapper.
7841           bool HasMapper = Mapper && Next == CE;
7842           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7843 
7844           // We need to add a pointer flag for each map that comes from the
7845           // same expression except for the first one. We also need to signal
7846           // this map is the first one that relates with the current capture
7847           // (there is a set of entries for each capture).
7848           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7849               MapType, MapModifiers, MotionModifiers, IsImplicit,
7850               !IsExpressionFirstInfo || RequiresReference ||
7851                   FirstPointerInComplexData,
7852               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7853 
7854           if (!IsExpressionFirstInfo) {
7855             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7856             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7857             if (IsPointer)
7858               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7859                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7860 
7861             if (ShouldBeMemberOf) {
7862               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7863               // should be later updated with the correct value of MEMBER_OF.
7864               Flags |= OMP_MAP_MEMBER_OF;
7865               // From now on, all subsequent PTR_AND_OBJ entries should not be
7866               // marked as MEMBER_OF.
7867               ShouldBeMemberOf = false;
7868             }
7869           }
7870 
7871           CombinedInfo.Types.push_back(Flags);
7872         }
7873 
7874         // If we have encountered a member expression so far, keep track of the
7875         // mapped member. If the parent is "*this", then the value declaration
7876         // is nullptr.
7877         if (EncounteredME) {
7878           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7879           unsigned FieldIndex = FD->getFieldIndex();
7880 
7881           // Update info about the lowest and highest elements for this struct
7882           if (!PartialStruct.Base.isValid()) {
7883             PartialStruct.LowestElem = {FieldIndex, LB};
7884             if (IsFinalArraySection) {
7885               Address HB =
7886                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7887                       .getAddress(CGF);
7888               PartialStruct.HighestElem = {FieldIndex, HB};
7889             } else {
7890               PartialStruct.HighestElem = {FieldIndex, LB};
7891             }
7892             PartialStruct.Base = BP;
7893           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7894             PartialStruct.LowestElem = {FieldIndex, LB};
7895           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7896             PartialStruct.HighestElem = {FieldIndex, LB};
7897           }
7898         }
7899 
7900         // Need to emit combined struct for array sections.
7901         if (IsFinalArraySection || IsNonContiguous)
7902           PartialStruct.IsArraySection = true;
7903 
7904         // If we have a final array section, we are done with this expression.
7905         if (IsFinalArraySection)
7906           break;
7907 
7908         // The pointer becomes the base for the next element.
7909         if (Next != CE)
7910           BP = LB;
7911 
7912         IsExpressionFirstInfo = false;
7913         IsCaptureFirstInfo = false;
7914         FirstPointerInComplexData = false;
7915       } else if (FirstPointerInComplexData) {
7916         QualType Ty = Components.rbegin()
7917                           ->getAssociatedDeclaration()
7918                           ->getType()
7919                           .getNonReferenceType();
7920         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7921         FirstPointerInComplexData = false;
7922       }
7923     }
7924     // If ran into the whole component - allocate the space for the whole
7925     // record.
7926     if (!EncounteredME)
7927       PartialStruct.HasCompleteRecord = true;
7928 
7929     if (!IsNonContiguous)
7930       return;
7931 
7932     const ASTContext &Context = CGF.getContext();
7933 
7934     // For supporting stride in array section, we need to initialize the first
7935     // dimension size as 1, first offset as 0, and first count as 1
7936     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7937     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7938     MapValuesArrayTy CurStrides;
7939     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7940     uint64_t ElementTypeSize;
7941 
7942     // Collect Size information for each dimension and get the element size as
7943     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7944     // should be [10, 10] and the first stride is 4 btyes.
7945     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7946          Components) {
7947       const Expr *AssocExpr = Component.getAssociatedExpression();
7948       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7949 
7950       if (!OASE)
7951         continue;
7952 
7953       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7954       auto *CAT = Context.getAsConstantArrayType(Ty);
7955       auto *VAT = Context.getAsVariableArrayType(Ty);
7956 
7957       // We need all the dimension size except for the last dimension.
7958       assert((VAT || CAT || &Component == &*Components.begin()) &&
7959              "Should be either ConstantArray or VariableArray if not the "
7960              "first Component");
7961 
7962       // Get element size if CurStrides is empty.
7963       if (CurStrides.empty()) {
7964         const Type *ElementType = nullptr;
7965         if (CAT)
7966           ElementType = CAT->getElementType().getTypePtr();
7967         else if (VAT)
7968           ElementType = VAT->getElementType().getTypePtr();
7969         else
7970           assert(&Component == &*Components.begin() &&
7971                  "Only expect pointer (non CAT or VAT) when this is the "
7972                  "first Component");
7973         // If ElementType is null, then it means the base is a pointer
7974         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7975         // for next iteration.
7976         if (ElementType) {
7977           // For the case that having pointer as base, we need to remove one
7978           // level of indirection.
7979           if (&Component != &*Components.begin())
7980             ElementType = ElementType->getPointeeOrArrayElementType();
7981           ElementTypeSize =
7982               Context.getTypeSizeInChars(ElementType).getQuantity();
7983           CurStrides.push_back(
7984               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7985         }
7986       }
7987       // Get dimension value except for the last dimension since we don't need
7988       // it.
7989       if (DimSizes.size() < Components.size() - 1) {
7990         if (CAT)
7991           DimSizes.push_back(llvm::ConstantInt::get(
7992               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7993         else if (VAT)
7994           DimSizes.push_back(CGF.Builder.CreateIntCast(
7995               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7996               /*IsSigned=*/false));
7997       }
7998     }
7999 
8000     // Skip the dummy dimension since we have already have its information.
8001     auto DI = DimSizes.begin() + 1;
8002     // Product of dimension.
8003     llvm::Value *DimProd =
8004         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8005 
8006     // Collect info for non-contiguous. Notice that offset, count, and stride
8007     // are only meaningful for array-section, so we insert a null for anything
8008     // other than array-section.
8009     // Also, the size of offset, count, and stride are not the same as
8010     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8011     // count, and stride are the same as the number of non-contiguous
8012     // declaration in target update to/from clause.
8013     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8014          Components) {
8015       const Expr *AssocExpr = Component.getAssociatedExpression();
8016 
8017       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8018         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8019             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8020             /*isSigned=*/false);
8021         CurOffsets.push_back(Offset);
8022         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8023         CurStrides.push_back(CurStrides.back());
8024         continue;
8025       }
8026 
8027       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8028 
8029       if (!OASE)
8030         continue;
8031 
8032       // Offset
8033       const Expr *OffsetExpr = OASE->getLowerBound();
8034       llvm::Value *Offset = nullptr;
8035       if (!OffsetExpr) {
8036         // If offset is absent, then we just set it to zero.
8037         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8038       } else {
8039         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8040                                            CGF.Int64Ty,
8041                                            /*isSigned=*/false);
8042       }
8043       CurOffsets.push_back(Offset);
8044 
8045       // Count
8046       const Expr *CountExpr = OASE->getLength();
8047       llvm::Value *Count = nullptr;
8048       if (!CountExpr) {
8049         // In Clang, once a high dimension is an array section, we construct all
8050         // the lower dimension as array section, however, for case like
8051         // arr[0:2][2], Clang construct the inner dimension as an array section
8052         // but it actually is not in an array section form according to spec.
8053         if (!OASE->getColonLocFirst().isValid() &&
8054             !OASE->getColonLocSecond().isValid()) {
8055           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8056         } else {
8057           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8058           // When the length is absent it defaults to ⌈(size −
8059           // lower-bound)/stride⌉, where size is the size of the array
8060           // dimension.
8061           const Expr *StrideExpr = OASE->getStride();
8062           llvm::Value *Stride =
8063               StrideExpr
8064                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8065                                               CGF.Int64Ty, /*isSigned=*/false)
8066                   : nullptr;
8067           if (Stride)
8068             Count = CGF.Builder.CreateUDiv(
8069                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8070           else
8071             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8072         }
8073       } else {
8074         Count = CGF.EmitScalarExpr(CountExpr);
8075       }
8076       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8077       CurCounts.push_back(Count);
8078 
8079       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8080       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8081       //              Offset      Count     Stride
8082       //    D0          0           1         4    (int)    <- dummy dimension
8083       //    D1          0           2         8    (2 * (1) * 4)
8084       //    D2          1           2         20   (1 * (1 * 5) * 4)
8085       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8086       const Expr *StrideExpr = OASE->getStride();
8087       llvm::Value *Stride =
8088           StrideExpr
8089               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8090                                           CGF.Int64Ty, /*isSigned=*/false)
8091               : nullptr;
8092       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8093       if (Stride)
8094         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8095       else
8096         CurStrides.push_back(DimProd);
8097       if (DI != DimSizes.end())
8098         ++DI;
8099     }
8100 
8101     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8102     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8103     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8104   }
8105 
8106   /// Return the adjusted map modifiers if the declaration a capture refers to
8107   /// appears in a first-private clause. This is expected to be used only with
8108   /// directives that start with 'target'.
8109   MappableExprsHandler::OpenMPOffloadMappingFlags
8110   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8111     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8112 
8113     // A first private variable captured by reference will use only the
8114     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8115     // declaration is known as first-private in this handler.
8116     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8117       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8118           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8119         return MappableExprsHandler::OMP_MAP_ALWAYS |
8120                MappableExprsHandler::OMP_MAP_TO;
8121       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8122         return MappableExprsHandler::OMP_MAP_TO |
8123                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8124       return MappableExprsHandler::OMP_MAP_PRIVATE |
8125              MappableExprsHandler::OMP_MAP_TO;
8126     }
8127     return MappableExprsHandler::OMP_MAP_TO |
8128            MappableExprsHandler::OMP_MAP_FROM;
8129   }
8130 
8131   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8132     // Rotate by getFlagMemberOffset() bits.
8133     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8134                                                   << getFlagMemberOffset());
8135   }
8136 
8137   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8138                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8139     // If the entry is PTR_AND_OBJ but has not been marked with the special
8140     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8141     // marked as MEMBER_OF.
8142     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8143         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8144       return;
8145 
8146     // Reset the placeholder value to prepare the flag for the assignment of the
8147     // proper MEMBER_OF value.
8148     Flags &= ~OMP_MAP_MEMBER_OF;
8149     Flags |= MemberOfFlag;
8150   }
8151 
8152   void getPlainLayout(const CXXRecordDecl *RD,
8153                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8154                       bool AsBase) const {
8155     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8156 
8157     llvm::StructType *St =
8158         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8159 
8160     unsigned NumElements = St->getNumElements();
8161     llvm::SmallVector<
8162         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8163         RecordLayout(NumElements);
8164 
8165     // Fill bases.
8166     for (const auto &I : RD->bases()) {
8167       if (I.isVirtual())
8168         continue;
8169       const auto *Base = I.getType()->getAsCXXRecordDecl();
8170       // Ignore empty bases.
8171       if (Base->isEmpty() || CGF.getContext()
8172                                  .getASTRecordLayout(Base)
8173                                  .getNonVirtualSize()
8174                                  .isZero())
8175         continue;
8176 
8177       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8178       RecordLayout[FieldIndex] = Base;
8179     }
8180     // Fill in virtual bases.
8181     for (const auto &I : RD->vbases()) {
8182       const auto *Base = I.getType()->getAsCXXRecordDecl();
8183       // Ignore empty bases.
8184       if (Base->isEmpty())
8185         continue;
8186       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8187       if (RecordLayout[FieldIndex])
8188         continue;
8189       RecordLayout[FieldIndex] = Base;
8190     }
8191     // Fill in all the fields.
8192     assert(!RD->isUnion() && "Unexpected union.");
8193     for (const auto *Field : RD->fields()) {
8194       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8195       // will fill in later.)
8196       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8197         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8198         RecordLayout[FieldIndex] = Field;
8199       }
8200     }
8201     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8202              &Data : RecordLayout) {
8203       if (Data.isNull())
8204         continue;
8205       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8206         getPlainLayout(Base, Layout, /*AsBase=*/true);
8207       else
8208         Layout.push_back(Data.get<const FieldDecl *>());
8209     }
8210   }
8211 
8212   /// Generate all the base pointers, section pointers, sizes, map types, and
8213   /// mappers for the extracted mappable expressions (all included in \a
8214   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8215   /// pair of the relevant declaration and index where it occurs is appended to
8216   /// the device pointers info array.
8217   void generateAllInfoForClauses(
8218       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8219       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8220           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8221     // We have to process the component lists that relate with the same
8222     // declaration in a single chunk so that we can generate the map flags
8223     // correctly. Therefore, we organize all lists in a map.
8224     enum MapKind { Present, Allocs, Other, Total };
8225     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8226                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8227         Info;
8228 
8229     // Helper function to fill the information map for the different supported
8230     // clauses.
8231     auto &&InfoGen =
8232         [&Info, &SkipVarSet](
8233             const ValueDecl *D, MapKind Kind,
8234             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8235             OpenMPMapClauseKind MapType,
8236             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8237             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8238             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8239             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8240           if (SkipVarSet.contains(D))
8241             return;
8242           auto It = Info.find(D);
8243           if (It == Info.end())
8244             It = Info
8245                      .insert(std::make_pair(
8246                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8247                      .first;
8248           It->second[Kind].emplace_back(
8249               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8250               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8251         };
8252 
8253     for (const auto *Cl : Clauses) {
8254       const auto *C = dyn_cast<OMPMapClause>(Cl);
8255       if (!C)
8256         continue;
8257       MapKind Kind = Other;
8258       if (!C->getMapTypeModifiers().empty() &&
8259           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8260             return K == OMPC_MAP_MODIFIER_present;
8261           }))
8262         Kind = Present;
8263       else if (C->getMapType() == OMPC_MAP_alloc)
8264         Kind = Allocs;
8265       const auto *EI = C->getVarRefs().begin();
8266       for (const auto L : C->component_lists()) {
8267         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8268         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8269                 C->getMapTypeModifiers(), llvm::None,
8270                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8271                 E);
8272         ++EI;
8273       }
8274     }
8275     for (const auto *Cl : Clauses) {
8276       const auto *C = dyn_cast<OMPToClause>(Cl);
8277       if (!C)
8278         continue;
8279       MapKind Kind = Other;
8280       if (!C->getMotionModifiers().empty() &&
8281           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8282             return K == OMPC_MOTION_MODIFIER_present;
8283           }))
8284         Kind = Present;
8285       const auto *EI = C->getVarRefs().begin();
8286       for (const auto L : C->component_lists()) {
8287         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8288                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8289                 C->isImplicit(), std::get<2>(L), *EI);
8290         ++EI;
8291       }
8292     }
8293     for (const auto *Cl : Clauses) {
8294       const auto *C = dyn_cast<OMPFromClause>(Cl);
8295       if (!C)
8296         continue;
8297       MapKind Kind = Other;
8298       if (!C->getMotionModifiers().empty() &&
8299           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8300             return K == OMPC_MOTION_MODIFIER_present;
8301           }))
8302         Kind = Present;
8303       const auto *EI = C->getVarRefs().begin();
8304       for (const auto L : C->component_lists()) {
8305         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8306                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8307                 C->isImplicit(), std::get<2>(L), *EI);
8308         ++EI;
8309       }
8310     }
8311 
8312     // Look at the use_device_ptr clause information and mark the existing map
8313     // entries as such. If there is no map information for an entry in the
8314     // use_device_ptr list, we create one with map type 'alloc' and zero size
8315     // section. It is the user fault if that was not mapped before. If there is
8316     // no map information and the pointer is a struct member, then we defer the
8317     // emission of that entry until the whole struct has been processed.
8318     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8319                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8320         DeferredInfo;
8321     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8322 
8323     for (const auto *Cl : Clauses) {
8324       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8325       if (!C)
8326         continue;
8327       for (const auto L : C->component_lists()) {
8328         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8329             std::get<1>(L);
8330         assert(!Components.empty() &&
8331                "Not expecting empty list of components!");
8332         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8333         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8334         const Expr *IE = Components.back().getAssociatedExpression();
8335         // If the first component is a member expression, we have to look into
8336         // 'this', which maps to null in the map of map information. Otherwise
8337         // look directly for the information.
8338         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8339 
8340         // We potentially have map information for this declaration already.
8341         // Look for the first set of components that refer to it.
8342         if (It != Info.end()) {
8343           bool Found = false;
8344           for (auto &Data : It->second) {
8345             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8346               return MI.Components.back().getAssociatedDeclaration() == VD;
8347             });
8348             // If we found a map entry, signal that the pointer has to be
8349             // returned and move on to the next declaration. Exclude cases where
8350             // the base pointer is mapped as array subscript, array section or
8351             // array shaping. The base address is passed as a pointer to base in
8352             // this case and cannot be used as a base for use_device_ptr list
8353             // item.
8354             if (CI != Data.end()) {
8355               auto PrevCI = std::next(CI->Components.rbegin());
8356               const auto *VarD = dyn_cast<VarDecl>(VD);
8357               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8358                   isa<MemberExpr>(IE) ||
8359                   !VD->getType().getNonReferenceType()->isPointerType() ||
8360                   PrevCI == CI->Components.rend() ||
8361                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8362                   VarD->hasLocalStorage()) {
8363                 CI->ReturnDevicePointer = true;
8364                 Found = true;
8365                 break;
8366               }
8367             }
8368           }
8369           if (Found)
8370             continue;
8371         }
8372 
8373         // We didn't find any match in our map information - generate a zero
8374         // size array section - if the pointer is a struct member we defer this
8375         // action until the whole struct has been processed.
8376         if (isa<MemberExpr>(IE)) {
8377           // Insert the pointer into Info to be processed by
8378           // generateInfoForComponentList. Because it is a member pointer
8379           // without a pointee, no entry will be generated for it, therefore
8380           // we need to generate one after the whole struct has been processed.
8381           // Nonetheless, generateInfoForComponentList must be called to take
8382           // the pointer into account for the calculation of the range of the
8383           // partial struct.
8384           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8385                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8386                   nullptr);
8387           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8388         } else {
8389           llvm::Value *Ptr =
8390               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8391           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8392           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8393           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8394           UseDevicePtrCombinedInfo.Sizes.push_back(
8395               llvm::Constant::getNullValue(CGF.Int64Ty));
8396           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8397           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8398         }
8399       }
8400     }
8401 
8402     // Look at the use_device_addr clause information and mark the existing map
8403     // entries as such. If there is no map information for an entry in the
8404     // use_device_addr list, we create one with map type 'alloc' and zero size
8405     // section. It is the user fault if that was not mapped before. If there is
8406     // no map information and the pointer is a struct member, then we defer the
8407     // emission of that entry until the whole struct has been processed.
8408     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8409     for (const auto *Cl : Clauses) {
8410       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8411       if (!C)
8412         continue;
8413       for (const auto L : C->component_lists()) {
8414         assert(!std::get<1>(L).empty() &&
8415                "Not expecting empty list of components!");
8416         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8417         if (!Processed.insert(VD).second)
8418           continue;
8419         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8420         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8421         // If the first component is a member expression, we have to look into
8422         // 'this', which maps to null in the map of map information. Otherwise
8423         // look directly for the information.
8424         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8425 
8426         // We potentially have map information for this declaration already.
8427         // Look for the first set of components that refer to it.
8428         if (It != Info.end()) {
8429           bool Found = false;
8430           for (auto &Data : It->second) {
8431             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8432               return MI.Components.back().getAssociatedDeclaration() == VD;
8433             });
8434             // If we found a map entry, signal that the pointer has to be
8435             // returned and move on to the next declaration.
8436             if (CI != Data.end()) {
8437               CI->ReturnDevicePointer = true;
8438               Found = true;
8439               break;
8440             }
8441           }
8442           if (Found)
8443             continue;
8444         }
8445 
8446         // We didn't find any match in our map information - generate a zero
8447         // size array section - if the pointer is a struct member we defer this
8448         // action until the whole struct has been processed.
8449         if (isa<MemberExpr>(IE)) {
8450           // Insert the pointer into Info to be processed by
8451           // generateInfoForComponentList. Because it is a member pointer
8452           // without a pointee, no entry will be generated for it, therefore
8453           // we need to generate one after the whole struct has been processed.
8454           // Nonetheless, generateInfoForComponentList must be called to take
8455           // the pointer into account for the calculation of the range of the
8456           // partial struct.
8457           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8458                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8459                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8460           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8461         } else {
8462           llvm::Value *Ptr;
8463           if (IE->isGLValue())
8464             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8465           else
8466             Ptr = CGF.EmitScalarExpr(IE);
8467           CombinedInfo.Exprs.push_back(VD);
8468           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8469           CombinedInfo.Pointers.push_back(Ptr);
8470           CombinedInfo.Sizes.push_back(
8471               llvm::Constant::getNullValue(CGF.Int64Ty));
8472           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8473           CombinedInfo.Mappers.push_back(nullptr);
8474         }
8475       }
8476     }
8477 
8478     for (const auto &Data : Info) {
8479       StructRangeInfoTy PartialStruct;
8480       // Temporary generated information.
8481       MapCombinedInfoTy CurInfo;
8482       const Decl *D = Data.first;
8483       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8484       for (const auto &M : Data.second) {
8485         for (const MapInfo &L : M) {
8486           assert(!L.Components.empty() &&
8487                  "Not expecting declaration with no component lists.");
8488 
8489           // Remember the current base pointer index.
8490           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8491           CurInfo.NonContigInfo.IsNonContiguous =
8492               L.Components.back().isNonContiguous();
8493           generateInfoForComponentList(
8494               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8495               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8496               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8497 
8498           // If this entry relates with a device pointer, set the relevant
8499           // declaration and add the 'return pointer' flag.
8500           if (L.ReturnDevicePointer) {
8501             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8502                    "Unexpected number of mapped base pointers.");
8503 
8504             const ValueDecl *RelevantVD =
8505                 L.Components.back().getAssociatedDeclaration();
8506             assert(RelevantVD &&
8507                    "No relevant declaration related with device pointer??");
8508 
8509             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8510                 RelevantVD);
8511             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8512           }
8513         }
8514       }
8515 
8516       // Append any pending zero-length pointers which are struct members and
8517       // used with use_device_ptr or use_device_addr.
8518       auto CI = DeferredInfo.find(Data.first);
8519       if (CI != DeferredInfo.end()) {
8520         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8521           llvm::Value *BasePtr;
8522           llvm::Value *Ptr;
8523           if (L.ForDeviceAddr) {
8524             if (L.IE->isGLValue())
8525               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8526             else
8527               Ptr = this->CGF.EmitScalarExpr(L.IE);
8528             BasePtr = Ptr;
8529             // Entry is RETURN_PARAM. Also, set the placeholder value
8530             // MEMBER_OF=FFFF so that the entry is later updated with the
8531             // correct value of MEMBER_OF.
8532             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8533           } else {
8534             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8535             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8536                                              L.IE->getExprLoc());
8537             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8538             // placeholder value MEMBER_OF=FFFF so that the entry is later
8539             // updated with the correct value of MEMBER_OF.
8540             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8541                                     OMP_MAP_MEMBER_OF);
8542           }
8543           CurInfo.Exprs.push_back(L.VD);
8544           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8545           CurInfo.Pointers.push_back(Ptr);
8546           CurInfo.Sizes.push_back(
8547               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8548           CurInfo.Mappers.push_back(nullptr);
8549         }
8550       }
8551       // If there is an entry in PartialStruct it means we have a struct with
8552       // individual members mapped. Emit an extra combined entry.
8553       if (PartialStruct.Base.isValid()) {
8554         CurInfo.NonContigInfo.Dims.push_back(0);
8555         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8556       }
8557 
8558       // We need to append the results of this capture to what we already
8559       // have.
8560       CombinedInfo.append(CurInfo);
8561     }
8562     // Append data for use_device_ptr clauses.
8563     CombinedInfo.append(UseDevicePtrCombinedInfo);
8564   }
8565 
8566 public:
8567   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8568       : CurDir(&Dir), CGF(CGF) {
8569     // Extract firstprivate clause information.
8570     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8571       for (const auto *D : C->varlists())
8572         FirstPrivateDecls.try_emplace(
8573             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8574     // Extract implicit firstprivates from uses_allocators clauses.
8575     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8576       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8577         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8578         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8579           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8580                                         /*Implicit=*/true);
8581         else if (const auto *VD = dyn_cast<VarDecl>(
8582                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8583                          ->getDecl()))
8584           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8585       }
8586     }
8587     // Extract device pointer clause information.
8588     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8589       for (auto L : C->component_lists())
8590         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8591   }
8592 
8593   /// Constructor for the declare mapper directive.
8594   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8595       : CurDir(&Dir), CGF(CGF) {}
8596 
8597   /// Generate code for the combined entry if we have a partially mapped struct
8598   /// and take care of the mapping flags of the arguments corresponding to
8599   /// individual struct members.
8600   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8601                          MapFlagsArrayTy &CurTypes,
8602                          const StructRangeInfoTy &PartialStruct,
8603                          const ValueDecl *VD = nullptr,
8604                          bool NotTargetParams = true) const {
8605     if (CurTypes.size() == 1 &&
8606         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8607         !PartialStruct.IsArraySection)
8608       return;
8609     Address LBAddr = PartialStruct.LowestElem.second;
8610     Address HBAddr = PartialStruct.HighestElem.second;
8611     if (PartialStruct.HasCompleteRecord) {
8612       LBAddr = PartialStruct.Base;
8613       HBAddr = PartialStruct.Base;
8614     }
8615     CombinedInfo.Exprs.push_back(VD);
8616     // Base is the base of the struct
8617     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8618     // Pointer is the address of the lowest element
8619     llvm::Value *LB = LBAddr.getPointer();
8620     CombinedInfo.Pointers.push_back(LB);
8621     // There should not be a mapper for a combined entry.
8622     CombinedInfo.Mappers.push_back(nullptr);
8623     // Size is (addr of {highest+1} element) - (addr of lowest element)
8624     llvm::Value *HB = HBAddr.getPointer();
8625     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8626     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8627     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8628     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8629     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8630                                                   /*isSigned=*/false);
8631     CombinedInfo.Sizes.push_back(Size);
8632     // Map type is always TARGET_PARAM, if generate info for captures.
8633     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8634                                                  : OMP_MAP_TARGET_PARAM);
8635     // If any element has the present modifier, then make sure the runtime
8636     // doesn't attempt to allocate the struct.
8637     if (CurTypes.end() !=
8638         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8639           return Type & OMP_MAP_PRESENT;
8640         }))
8641       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8642     // Remove TARGET_PARAM flag from the first element
8643     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8644 
8645     // All other current entries will be MEMBER_OF the combined entry
8646     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8647     // 0xFFFF in the MEMBER_OF field).
8648     OpenMPOffloadMappingFlags MemberOfFlag =
8649         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8650     for (auto &M : CurTypes)
8651       setCorrectMemberOfFlag(M, MemberOfFlag);
8652   }
8653 
8654   /// Generate all the base pointers, section pointers, sizes, map types, and
8655   /// mappers for the extracted mappable expressions (all included in \a
8656   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8657   /// pair of the relevant declaration and index where it occurs is appended to
8658   /// the device pointers info array.
8659   void generateAllInfo(
8660       MapCombinedInfoTy &CombinedInfo,
8661       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8662           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8663     assert(CurDir.is<const OMPExecutableDirective *>() &&
8664            "Expect a executable directive");
8665     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8666     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8667   }
8668 
8669   /// Generate all the base pointers, section pointers, sizes, map types, and
8670   /// mappers for the extracted map clauses of user-defined mapper (all included
8671   /// in \a CombinedInfo).
8672   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8673     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8674            "Expect a declare mapper directive");
8675     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8676     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8677   }
8678 
8679   /// Emit capture info for lambdas for variables captured by reference.
8680   void generateInfoForLambdaCaptures(
8681       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8682       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8683     const auto *RD = VD->getType()
8684                          .getCanonicalType()
8685                          .getNonReferenceType()
8686                          ->getAsCXXRecordDecl();
8687     if (!RD || !RD->isLambda())
8688       return;
8689     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8690     LValue VDLVal = CGF.MakeAddrLValue(
8691         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8692     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8693     FieldDecl *ThisCapture = nullptr;
8694     RD->getCaptureFields(Captures, ThisCapture);
8695     if (ThisCapture) {
8696       LValue ThisLVal =
8697           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8698       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8699       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8700                                  VDLVal.getPointer(CGF));
8701       CombinedInfo.Exprs.push_back(VD);
8702       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8703       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8704       CombinedInfo.Sizes.push_back(
8705           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8706                                     CGF.Int64Ty, /*isSigned=*/true));
8707       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8708                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8709       CombinedInfo.Mappers.push_back(nullptr);
8710     }
8711     for (const LambdaCapture &LC : RD->captures()) {
8712       if (!LC.capturesVariable())
8713         continue;
8714       const VarDecl *VD = LC.getCapturedVar();
8715       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8716         continue;
8717       auto It = Captures.find(VD);
8718       assert(It != Captures.end() && "Found lambda capture without field.");
8719       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8720       if (LC.getCaptureKind() == LCK_ByRef) {
8721         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8722         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8723                                    VDLVal.getPointer(CGF));
8724         CombinedInfo.Exprs.push_back(VD);
8725         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8726         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8727         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8728             CGF.getTypeSize(
8729                 VD->getType().getCanonicalType().getNonReferenceType()),
8730             CGF.Int64Ty, /*isSigned=*/true));
8731       } else {
8732         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8733         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8734                                    VDLVal.getPointer(CGF));
8735         CombinedInfo.Exprs.push_back(VD);
8736         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8737         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8738         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8739       }
8740       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8741                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8742       CombinedInfo.Mappers.push_back(nullptr);
8743     }
8744   }
8745 
8746   /// Set correct indices for lambdas captures.
8747   void adjustMemberOfForLambdaCaptures(
8748       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8749       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8750       MapFlagsArrayTy &Types) const {
8751     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8752       // Set correct member_of idx for all implicit lambda captures.
8753       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8754                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8755         continue;
8756       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8757       assert(BasePtr && "Unable to find base lambda address.");
8758       int TgtIdx = -1;
8759       for (unsigned J = I; J > 0; --J) {
8760         unsigned Idx = J - 1;
8761         if (Pointers[Idx] != BasePtr)
8762           continue;
8763         TgtIdx = Idx;
8764         break;
8765       }
8766       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8767       // All other current entries will be MEMBER_OF the combined entry
8768       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8769       // 0xFFFF in the MEMBER_OF field).
8770       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8771       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8772     }
8773   }
8774 
8775   /// Generate the base pointers, section pointers, sizes, map types, and
8776   /// mappers associated to a given capture (all included in \a CombinedInfo).
8777   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8778                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8779                               StructRangeInfoTy &PartialStruct) const {
8780     assert(!Cap->capturesVariableArrayType() &&
8781            "Not expecting to generate map info for a variable array type!");
8782 
8783     // We need to know when we generating information for the first component
8784     const ValueDecl *VD = Cap->capturesThis()
8785                               ? nullptr
8786                               : Cap->getCapturedVar()->getCanonicalDecl();
8787 
8788     // If this declaration appears in a is_device_ptr clause we just have to
8789     // pass the pointer by value. If it is a reference to a declaration, we just
8790     // pass its value.
8791     if (DevPointersMap.count(VD)) {
8792       CombinedInfo.Exprs.push_back(VD);
8793       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8794       CombinedInfo.Pointers.push_back(Arg);
8795       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8796           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8797           /*isSigned=*/true));
8798       CombinedInfo.Types.push_back(
8799           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8800           OMP_MAP_TARGET_PARAM);
8801       CombinedInfo.Mappers.push_back(nullptr);
8802       return;
8803     }
8804 
8805     using MapData =
8806         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8807                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8808                    const ValueDecl *, const Expr *>;
8809     SmallVector<MapData, 4> DeclComponentLists;
8810     assert(CurDir.is<const OMPExecutableDirective *>() &&
8811            "Expect a executable directive");
8812     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8813     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8814       const auto *EI = C->getVarRefs().begin();
8815       for (const auto L : C->decl_component_lists(VD)) {
8816         const ValueDecl *VDecl, *Mapper;
8817         // The Expression is not correct if the mapping is implicit
8818         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8819         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8820         std::tie(VDecl, Components, Mapper) = L;
8821         assert(VDecl == VD && "We got information for the wrong declaration??");
8822         assert(!Components.empty() &&
8823                "Not expecting declaration with no component lists.");
8824         DeclComponentLists.emplace_back(Components, C->getMapType(),
8825                                         C->getMapTypeModifiers(),
8826                                         C->isImplicit(), Mapper, E);
8827         ++EI;
8828       }
8829     }
8830     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8831                                              const MapData &RHS) {
8832       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8833       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8834       bool HasPresent = !MapModifiers.empty() &&
8835                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8836                           return K == clang::OMPC_MAP_MODIFIER_present;
8837                         });
8838       bool HasAllocs = MapType == OMPC_MAP_alloc;
8839       MapModifiers = std::get<2>(RHS);
8840       MapType = std::get<1>(LHS);
8841       bool HasPresentR =
8842           !MapModifiers.empty() &&
8843           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8844             return K == clang::OMPC_MAP_MODIFIER_present;
8845           });
8846       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8847       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8848     });
8849 
8850     // Find overlapping elements (including the offset from the base element).
8851     llvm::SmallDenseMap<
8852         const MapData *,
8853         llvm::SmallVector<
8854             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8855         4>
8856         OverlappedData;
8857     size_t Count = 0;
8858     for (const MapData &L : DeclComponentLists) {
8859       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8860       OpenMPMapClauseKind MapType;
8861       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8862       bool IsImplicit;
8863       const ValueDecl *Mapper;
8864       const Expr *VarRef;
8865       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8866           L;
8867       ++Count;
8868       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8869         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8870         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8871                  VarRef) = L1;
8872         auto CI = Components.rbegin();
8873         auto CE = Components.rend();
8874         auto SI = Components1.rbegin();
8875         auto SE = Components1.rend();
8876         for (; CI != CE && SI != SE; ++CI, ++SI) {
8877           if (CI->getAssociatedExpression()->getStmtClass() !=
8878               SI->getAssociatedExpression()->getStmtClass())
8879             break;
8880           // Are we dealing with different variables/fields?
8881           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8882             break;
8883         }
8884         // Found overlapping if, at least for one component, reached the head
8885         // of the components list.
8886         if (CI == CE || SI == SE) {
8887           // Ignore it if it is the same component.
8888           if (CI == CE && SI == SE)
8889             continue;
8890           const auto It = (SI == SE) ? CI : SI;
8891           // If one component is a pointer and another one is a kind of
8892           // dereference of this pointer (array subscript, section, dereference,
8893           // etc.), it is not an overlapping.
8894           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8895               std::prev(It)
8896                   ->getAssociatedExpression()
8897                   ->getType()
8898                   .getNonReferenceType()
8899                   ->isPointerType())
8900             continue;
8901           const MapData &BaseData = CI == CE ? L : L1;
8902           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8903               SI == SE ? Components : Components1;
8904           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8905           OverlappedElements.getSecond().push_back(SubData);
8906         }
8907       }
8908     }
8909     // Sort the overlapped elements for each item.
8910     llvm::SmallVector<const FieldDecl *, 4> Layout;
8911     if (!OverlappedData.empty()) {
8912       if (const auto *CRD =
8913               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8914         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8915       else {
8916         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8917         Layout.append(RD->field_begin(), RD->field_end());
8918       }
8919     }
8920     for (auto &Pair : OverlappedData) {
8921       llvm::stable_sort(
8922           Pair.getSecond(),
8923           [&Layout](
8924               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8925               OMPClauseMappableExprCommon::MappableExprComponentListRef
8926                   Second) {
8927             auto CI = First.rbegin();
8928             auto CE = First.rend();
8929             auto SI = Second.rbegin();
8930             auto SE = Second.rend();
8931             for (; CI != CE && SI != SE; ++CI, ++SI) {
8932               if (CI->getAssociatedExpression()->getStmtClass() !=
8933                   SI->getAssociatedExpression()->getStmtClass())
8934                 break;
8935               // Are we dealing with different variables/fields?
8936               if (CI->getAssociatedDeclaration() !=
8937                   SI->getAssociatedDeclaration())
8938                 break;
8939             }
8940 
8941             // Lists contain the same elements.
8942             if (CI == CE && SI == SE)
8943               return false;
8944 
8945             // List with less elements is less than list with more elements.
8946             if (CI == CE || SI == SE)
8947               return CI == CE;
8948 
8949             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8950             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8951             if (FD1->getParent() == FD2->getParent())
8952               return FD1->getFieldIndex() < FD2->getFieldIndex();
8953             const auto It =
8954                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8955                   return FD == FD1 || FD == FD2;
8956                 });
8957             return *It == FD1;
8958           });
8959     }
8960 
8961     // Associated with a capture, because the mapping flags depend on it.
8962     // Go through all of the elements with the overlapped elements.
8963     bool IsFirstComponentList = true;
8964     for (const auto &Pair : OverlappedData) {
8965       const MapData &L = *Pair.getFirst();
8966       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8967       OpenMPMapClauseKind MapType;
8968       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8969       bool IsImplicit;
8970       const ValueDecl *Mapper;
8971       const Expr *VarRef;
8972       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8973           L;
8974       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8975           OverlappedComponents = Pair.getSecond();
8976       generateInfoForComponentList(
8977           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8978           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8979           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8980       IsFirstComponentList = false;
8981     }
8982     // Go through other elements without overlapped elements.
8983     for (const MapData &L : DeclComponentLists) {
8984       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8985       OpenMPMapClauseKind MapType;
8986       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8987       bool IsImplicit;
8988       const ValueDecl *Mapper;
8989       const Expr *VarRef;
8990       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8991           L;
8992       auto It = OverlappedData.find(&L);
8993       if (It == OverlappedData.end())
8994         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8995                                      Components, CombinedInfo, PartialStruct,
8996                                      IsFirstComponentList, IsImplicit, Mapper,
8997                                      /*ForDeviceAddr=*/false, VD, VarRef);
8998       IsFirstComponentList = false;
8999     }
9000   }
9001 
9002   /// Generate the default map information for a given capture \a CI,
9003   /// record field declaration \a RI and captured value \a CV.
9004   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9005                               const FieldDecl &RI, llvm::Value *CV,
9006                               MapCombinedInfoTy &CombinedInfo) const {
9007     bool IsImplicit = true;
9008     // Do the default mapping.
9009     if (CI.capturesThis()) {
9010       CombinedInfo.Exprs.push_back(nullptr);
9011       CombinedInfo.BasePointers.push_back(CV);
9012       CombinedInfo.Pointers.push_back(CV);
9013       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9014       CombinedInfo.Sizes.push_back(
9015           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9016                                     CGF.Int64Ty, /*isSigned=*/true));
9017       // Default map type.
9018       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9019     } else if (CI.capturesVariableByCopy()) {
9020       const VarDecl *VD = CI.getCapturedVar();
9021       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9022       CombinedInfo.BasePointers.push_back(CV);
9023       CombinedInfo.Pointers.push_back(CV);
9024       if (!RI.getType()->isAnyPointerType()) {
9025         // We have to signal to the runtime captures passed by value that are
9026         // not pointers.
9027         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9028         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9029             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9030       } else {
9031         // Pointers are implicitly mapped with a zero size and no flags
9032         // (other than first map that is added for all implicit maps).
9033         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9034         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9035       }
9036       auto I = FirstPrivateDecls.find(VD);
9037       if (I != FirstPrivateDecls.end())
9038         IsImplicit = I->getSecond();
9039     } else {
9040       assert(CI.capturesVariable() && "Expected captured reference.");
9041       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9042       QualType ElementType = PtrTy->getPointeeType();
9043       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9044           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9045       // The default map type for a scalar/complex type is 'to' because by
9046       // default the value doesn't have to be retrieved. For an aggregate
9047       // type, the default is 'tofrom'.
9048       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9049       const VarDecl *VD = CI.getCapturedVar();
9050       auto I = FirstPrivateDecls.find(VD);
9051       if (I != FirstPrivateDecls.end() &&
9052           VD->getType().isConstant(CGF.getContext())) {
9053         llvm::Constant *Addr =
9054             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9055         // Copy the value of the original variable to the new global copy.
9056         CGF.Builder.CreateMemCpy(
9057             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9058             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9059             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9060         // Use new global variable as the base pointers.
9061         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9062         CombinedInfo.BasePointers.push_back(Addr);
9063         CombinedInfo.Pointers.push_back(Addr);
9064       } else {
9065         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9066         CombinedInfo.BasePointers.push_back(CV);
9067         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9068           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9069               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9070               AlignmentSource::Decl));
9071           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9072         } else {
9073           CombinedInfo.Pointers.push_back(CV);
9074         }
9075       }
9076       if (I != FirstPrivateDecls.end())
9077         IsImplicit = I->getSecond();
9078     }
9079     // Every default map produces a single argument which is a target parameter.
9080     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9081 
9082     // Add flag stating this is an implicit map.
9083     if (IsImplicit)
9084       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9085 
9086     // No user-defined mapper for default mapping.
9087     CombinedInfo.Mappers.push_back(nullptr);
9088   }
9089 };
9090 } // anonymous namespace
9091 
9092 static void emitNonContiguousDescriptor(
9093     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9094     CGOpenMPRuntime::TargetDataInfo &Info) {
9095   CodeGenModule &CGM = CGF.CGM;
9096   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9097       &NonContigInfo = CombinedInfo.NonContigInfo;
9098 
9099   // Build an array of struct descriptor_dim and then assign it to
9100   // offload_args.
9101   //
9102   // struct descriptor_dim {
9103   //  uint64_t offset;
9104   //  uint64_t count;
9105   //  uint64_t stride
9106   // };
9107   ASTContext &C = CGF.getContext();
9108   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9109   RecordDecl *RD;
9110   RD = C.buildImplicitRecord("descriptor_dim");
9111   RD->startDefinition();
9112   addFieldToRecordDecl(C, RD, Int64Ty);
9113   addFieldToRecordDecl(C, RD, Int64Ty);
9114   addFieldToRecordDecl(C, RD, Int64Ty);
9115   RD->completeDefinition();
9116   QualType DimTy = C.getRecordType(RD);
9117 
9118   enum { OffsetFD = 0, CountFD, StrideFD };
9119   // We need two index variable here since the size of "Dims" is the same as the
9120   // size of Components, however, the size of offset, count, and stride is equal
9121   // to the size of base declaration that is non-contiguous.
9122   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9123     // Skip emitting ir if dimension size is 1 since it cannot be
9124     // non-contiguous.
9125     if (NonContigInfo.Dims[I] == 1)
9126       continue;
9127     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9128     QualType ArrayTy =
9129         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9130     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9131     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9132       unsigned RevIdx = EE - II - 1;
9133       LValue DimsLVal = CGF.MakeAddrLValue(
9134           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9135       // Offset
9136       LValue OffsetLVal = CGF.EmitLValueForField(
9137           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9138       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9139       // Count
9140       LValue CountLVal = CGF.EmitLValueForField(
9141           DimsLVal, *std::next(RD->field_begin(), CountFD));
9142       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9143       // Stride
9144       LValue StrideLVal = CGF.EmitLValueForField(
9145           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9146       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9147     }
9148     // args[I] = &dims
9149     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9150         DimsAddr, CGM.Int8PtrTy);
9151     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9152         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9153         Info.PointersArray, 0, I);
9154     Address PAddr(P, CGF.getPointerAlign());
9155     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9156     ++L;
9157   }
9158 }
9159 
9160 /// Emit a string constant containing the names of the values mapped to the
9161 /// offloading runtime library.
9162 llvm::Constant *
9163 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9164                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9165   llvm::Constant *SrcLocStr;
9166   if (!MapExprs.getMapDecl()) {
9167     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9168   } else {
9169     std::string ExprName = "";
9170     if (MapExprs.getMapExpr()) {
9171       PrintingPolicy P(CGF.getContext().getLangOpts());
9172       llvm::raw_string_ostream OS(ExprName);
9173       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9174       OS.flush();
9175     } else {
9176       ExprName = MapExprs.getMapDecl()->getNameAsString();
9177     }
9178 
9179     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9180     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9181     const char *FileName = PLoc.getFilename();
9182     unsigned Line = PLoc.getLine();
9183     unsigned Column = PLoc.getColumn();
9184     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9185                                                 Line, Column);
9186   }
9187 
9188   return SrcLocStr;
9189 }
9190 
9191 /// Emit the arrays used to pass the captures and map information to the
9192 /// offloading runtime library. If there is no map or capture information,
9193 /// return nullptr by reference.
9194 static void emitOffloadingArrays(
9195     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9196     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9197     bool IsNonContiguous = false) {
9198   CodeGenModule &CGM = CGF.CGM;
9199   ASTContext &Ctx = CGF.getContext();
9200 
9201   // Reset the array information.
9202   Info.clearArrayInfo();
9203   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9204 
9205   if (Info.NumberOfPtrs) {
9206     // Detect if we have any capture size requiring runtime evaluation of the
9207     // size so that a constant array could be eventually used.
9208     bool hasRuntimeEvaluationCaptureSize = false;
9209     for (llvm::Value *S : CombinedInfo.Sizes)
9210       if (!isa<llvm::Constant>(S)) {
9211         hasRuntimeEvaluationCaptureSize = true;
9212         break;
9213       }
9214 
9215     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9216     QualType PointerArrayType = Ctx.getConstantArrayType(
9217         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9218         /*IndexTypeQuals=*/0);
9219 
9220     Info.BasePointersArray =
9221         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9222     Info.PointersArray =
9223         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9224     Address MappersArray =
9225         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9226     Info.MappersArray = MappersArray.getPointer();
9227 
9228     // If we don't have any VLA types or other types that require runtime
9229     // evaluation, we can use a constant array for the map sizes, otherwise we
9230     // need to fill up the arrays as we do for the pointers.
9231     QualType Int64Ty =
9232         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9233     if (hasRuntimeEvaluationCaptureSize) {
9234       QualType SizeArrayType = Ctx.getConstantArrayType(
9235           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9236           /*IndexTypeQuals=*/0);
9237       Info.SizesArray =
9238           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9239     } else {
9240       // We expect all the sizes to be constant, so we collect them to create
9241       // a constant array.
9242       SmallVector<llvm::Constant *, 16> ConstSizes;
9243       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9244         if (IsNonContiguous &&
9245             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9246           ConstSizes.push_back(llvm::ConstantInt::get(
9247               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9248         } else {
9249           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9250         }
9251       }
9252 
9253       auto *SizesArrayInit = llvm::ConstantArray::get(
9254           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9255       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9256       auto *SizesArrayGbl = new llvm::GlobalVariable(
9257           CGM.getModule(), SizesArrayInit->getType(),
9258           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9259           SizesArrayInit, Name);
9260       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9261       Info.SizesArray = SizesArrayGbl;
9262     }
9263 
9264     // The map types are always constant so we don't need to generate code to
9265     // fill arrays. Instead, we create an array constant.
9266     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9267     llvm::copy(CombinedInfo.Types, Mapping.begin());
9268     llvm::Constant *MapTypesArrayInit =
9269         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9270     std::string MaptypesName =
9271         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9272     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9273         CGM.getModule(), MapTypesArrayInit->getType(),
9274         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9275         MapTypesArrayInit, MaptypesName);
9276     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9277     Info.MapTypesArray = MapTypesArrayGbl;
9278 
9279     // The information types are only built if there is debug information
9280     // requested.
9281     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9282       Info.MapNamesArray = llvm::Constant::getNullValue(
9283           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9284     } else {
9285       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9286         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9287       };
9288       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9289       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9290 
9291       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9292           llvm::ArrayType::get(
9293               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9294               CombinedInfo.Exprs.size()),
9295           InfoMap);
9296       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9297           CGM.getModule(), MapNamesArrayInit->getType(),
9298           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9299           MapNamesArrayInit,
9300           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9301       Info.MapNamesArray = MapNamesArrayGbl;
9302     }
9303 
9304     // If there's a present map type modifier, it must not be applied to the end
9305     // of a region, so generate a separate map type array in that case.
9306     if (Info.separateBeginEndCalls()) {
9307       bool EndMapTypesDiffer = false;
9308       for (uint64_t &Type : Mapping) {
9309         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9310           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9311           EndMapTypesDiffer = true;
9312         }
9313       }
9314       if (EndMapTypesDiffer) {
9315         MapTypesArrayInit =
9316             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9317         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9318         MapTypesArrayGbl = new llvm::GlobalVariable(
9319             CGM.getModule(), MapTypesArrayInit->getType(),
9320             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9321             MapTypesArrayInit, MaptypesName);
9322         MapTypesArrayGbl->setUnnamedAddr(
9323             llvm::GlobalValue::UnnamedAddr::Global);
9324         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9325       }
9326     }
9327 
9328     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9329       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9330       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9331           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9332           Info.BasePointersArray, 0, I);
9333       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9334           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9335       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9336       CGF.Builder.CreateStore(BPVal, BPAddr);
9337 
9338       if (Info.requiresDevicePointerInfo())
9339         if (const ValueDecl *DevVD =
9340                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9341           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9342 
9343       llvm::Value *PVal = CombinedInfo.Pointers[I];
9344       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9345           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9346           Info.PointersArray, 0, I);
9347       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9348           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9349       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9350       CGF.Builder.CreateStore(PVal, PAddr);
9351 
9352       if (hasRuntimeEvaluationCaptureSize) {
9353         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9354             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9355             Info.SizesArray,
9356             /*Idx0=*/0,
9357             /*Idx1=*/I);
9358         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9359         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9360                                                           CGM.Int64Ty,
9361                                                           /*isSigned=*/true),
9362                                 SAddr);
9363       }
9364 
9365       // Fill up the mapper array.
9366       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9367       if (CombinedInfo.Mappers[I]) {
9368         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9369             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9370         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9371         Info.HasMapper = true;
9372       }
9373       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9374       CGF.Builder.CreateStore(MFunc, MAddr);
9375     }
9376   }
9377 
9378   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9379       Info.NumberOfPtrs == 0)
9380     return;
9381 
9382   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9383 }
9384 
9385 namespace {
9386 /// Additional arguments for emitOffloadingArraysArgument function.
9387 struct ArgumentsOptions {
9388   bool ForEndCall = false;
9389   ArgumentsOptions() = default;
9390   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9391 };
9392 } // namespace
9393 
9394 /// Emit the arguments to be passed to the runtime library based on the
9395 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9396 /// ForEndCall, emit map types to be passed for the end of the region instead of
9397 /// the beginning.
9398 static void emitOffloadingArraysArgument(
9399     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9400     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9401     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9402     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9403     const ArgumentsOptions &Options = ArgumentsOptions()) {
9404   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9405          "expected region end call to runtime only when end call is separate");
9406   CodeGenModule &CGM = CGF.CGM;
9407   if (Info.NumberOfPtrs) {
9408     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9409         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9410         Info.BasePointersArray,
9411         /*Idx0=*/0, /*Idx1=*/0);
9412     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9413         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9414         Info.PointersArray,
9415         /*Idx0=*/0,
9416         /*Idx1=*/0);
9417     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9418         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9419         /*Idx0=*/0, /*Idx1=*/0);
9420     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9421         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9422         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9423                                                     : Info.MapTypesArray,
9424         /*Idx0=*/0,
9425         /*Idx1=*/0);
9426 
9427     // Only emit the mapper information arrays if debug information is
9428     // requested.
9429     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9430       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9431     else
9432       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9433           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9434           Info.MapNamesArray,
9435           /*Idx0=*/0,
9436           /*Idx1=*/0);
9437     // If there is no user-defined mapper, set the mapper array to nullptr to
9438     // avoid an unnecessary data privatization
9439     if (!Info.HasMapper)
9440       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9441     else
9442       MappersArrayArg =
9443           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9444   } else {
9445     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9446     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9447     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9448     MapTypesArrayArg =
9449         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9450     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9451     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9452   }
9453 }
9454 
9455 /// Check for inner distribute directive.
9456 static const OMPExecutableDirective *
9457 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9458   const auto *CS = D.getInnermostCapturedStmt();
9459   const auto *Body =
9460       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9461   const Stmt *ChildStmt =
9462       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9463 
9464   if (const auto *NestedDir =
9465           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9466     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9467     switch (D.getDirectiveKind()) {
9468     case OMPD_target:
9469       if (isOpenMPDistributeDirective(DKind))
9470         return NestedDir;
9471       if (DKind == OMPD_teams) {
9472         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9473             /*IgnoreCaptured=*/true);
9474         if (!Body)
9475           return nullptr;
9476         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9477         if (const auto *NND =
9478                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9479           DKind = NND->getDirectiveKind();
9480           if (isOpenMPDistributeDirective(DKind))
9481             return NND;
9482         }
9483       }
9484       return nullptr;
9485     case OMPD_target_teams:
9486       if (isOpenMPDistributeDirective(DKind))
9487         return NestedDir;
9488       return nullptr;
9489     case OMPD_target_parallel:
9490     case OMPD_target_simd:
9491     case OMPD_target_parallel_for:
9492     case OMPD_target_parallel_for_simd:
9493       return nullptr;
9494     case OMPD_target_teams_distribute:
9495     case OMPD_target_teams_distribute_simd:
9496     case OMPD_target_teams_distribute_parallel_for:
9497     case OMPD_target_teams_distribute_parallel_for_simd:
9498     case OMPD_parallel:
9499     case OMPD_for:
9500     case OMPD_parallel_for:
9501     case OMPD_parallel_master:
9502     case OMPD_parallel_sections:
9503     case OMPD_for_simd:
9504     case OMPD_parallel_for_simd:
9505     case OMPD_cancel:
9506     case OMPD_cancellation_point:
9507     case OMPD_ordered:
9508     case OMPD_threadprivate:
9509     case OMPD_allocate:
9510     case OMPD_task:
9511     case OMPD_simd:
9512     case OMPD_tile:
9513     case OMPD_sections:
9514     case OMPD_section:
9515     case OMPD_single:
9516     case OMPD_master:
9517     case OMPD_critical:
9518     case OMPD_taskyield:
9519     case OMPD_barrier:
9520     case OMPD_taskwait:
9521     case OMPD_taskgroup:
9522     case OMPD_atomic:
9523     case OMPD_flush:
9524     case OMPD_depobj:
9525     case OMPD_scan:
9526     case OMPD_teams:
9527     case OMPD_target_data:
9528     case OMPD_target_exit_data:
9529     case OMPD_target_enter_data:
9530     case OMPD_distribute:
9531     case OMPD_distribute_simd:
9532     case OMPD_distribute_parallel_for:
9533     case OMPD_distribute_parallel_for_simd:
9534     case OMPD_teams_distribute:
9535     case OMPD_teams_distribute_simd:
9536     case OMPD_teams_distribute_parallel_for:
9537     case OMPD_teams_distribute_parallel_for_simd:
9538     case OMPD_target_update:
9539     case OMPD_declare_simd:
9540     case OMPD_declare_variant:
9541     case OMPD_begin_declare_variant:
9542     case OMPD_end_declare_variant:
9543     case OMPD_declare_target:
9544     case OMPD_end_declare_target:
9545     case OMPD_declare_reduction:
9546     case OMPD_declare_mapper:
9547     case OMPD_taskloop:
9548     case OMPD_taskloop_simd:
9549     case OMPD_master_taskloop:
9550     case OMPD_master_taskloop_simd:
9551     case OMPD_parallel_master_taskloop:
9552     case OMPD_parallel_master_taskloop_simd:
9553     case OMPD_requires:
9554     case OMPD_unknown:
9555     default:
9556       llvm_unreachable("Unexpected directive.");
9557     }
9558   }
9559 
9560   return nullptr;
9561 }
9562 
9563 /// Emit the user-defined mapper function. The code generation follows the
9564 /// pattern in the example below.
9565 /// \code
9566 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9567 ///                                           void *base, void *begin,
9568 ///                                           int64_t size, int64_t type,
9569 ///                                           void *name = nullptr) {
9570 ///   // Allocate space for an array section first.
9571 ///   if ((size > 1 || base != begin) && !maptype.IsDelete)
9572 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9573 ///                                 size*sizeof(Ty), clearToFrom(type));
9574 ///   // Map members.
9575 ///   for (unsigned i = 0; i < size; i++) {
9576 ///     // For each component specified by this mapper:
9577 ///     for (auto c : begin[i]->all_components) {
9578 ///       if (c.hasMapper())
9579 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9580 ///                       c.arg_type, c.arg_name);
9581 ///       else
9582 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9583 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9584 ///                                     c.arg_name);
9585 ///     }
9586 ///   }
9587 ///   // Delete the array section.
9588 ///   if ((size > 1 || base != begin) && maptype.IsDelete)
9589 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9590 ///                                 size*sizeof(Ty), clearToFrom(type));
9591 /// }
9592 /// \endcode
9593 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9594                                             CodeGenFunction *CGF) {
9595   if (UDMMap.count(D) > 0)
9596     return;
9597   ASTContext &C = CGM.getContext();
9598   QualType Ty = D->getType();
9599   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9600   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9601   auto *MapperVarDecl =
9602       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9603   SourceLocation Loc = D->getLocation();
9604   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9605 
9606   // Prepare mapper function arguments and attributes.
9607   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9608                               C.VoidPtrTy, ImplicitParamDecl::Other);
9609   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9610                             ImplicitParamDecl::Other);
9611   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9612                              C.VoidPtrTy, ImplicitParamDecl::Other);
9613   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9614                             ImplicitParamDecl::Other);
9615   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9616                             ImplicitParamDecl::Other);
9617   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9618                             ImplicitParamDecl::Other);
9619   FunctionArgList Args;
9620   Args.push_back(&HandleArg);
9621   Args.push_back(&BaseArg);
9622   Args.push_back(&BeginArg);
9623   Args.push_back(&SizeArg);
9624   Args.push_back(&TypeArg);
9625   Args.push_back(&NameArg);
9626   const CGFunctionInfo &FnInfo =
9627       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9628   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9629   SmallString<64> TyStr;
9630   llvm::raw_svector_ostream Out(TyStr);
9631   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9632   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9633   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9634                                     Name, &CGM.getModule());
9635   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9636   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9637   // Start the mapper function code generation.
9638   CodeGenFunction MapperCGF(CGM);
9639   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9640   // Compute the starting and end addresses of array elements.
9641   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9642       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9643       C.getPointerType(Int64Ty), Loc);
9644   // Prepare common arguments for array initiation and deletion.
9645   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9646       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9647       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9648   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9649       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9650       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9651   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9652       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9653       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9654   // Convert the size in bytes into the number of array elements.
9655   Size = MapperCGF.Builder.CreateExactUDiv(
9656       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9657   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9658       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9659   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9660   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9661       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9662       C.getPointerType(Int64Ty), Loc);
9663 
9664   // Emit array initiation if this is an array section and \p MapType indicates
9665   // that memory allocation is required.
9666   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9667   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9668                              ElementSize, HeadBB, /*IsInit=*/true);
9669 
9670   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9671 
9672   // Emit the loop header block.
9673   MapperCGF.EmitBlock(HeadBB);
9674   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9675   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9676   // Evaluate whether the initial condition is satisfied.
9677   llvm::Value *IsEmpty =
9678       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9679   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9680   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9681 
9682   // Emit the loop body block.
9683   MapperCGF.EmitBlock(BodyBB);
9684   llvm::BasicBlock *LastBB = BodyBB;
9685   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9686       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9687   PtrPHI->addIncoming(PtrBegin, EntryBB);
9688   Address PtrCurrent =
9689       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9690                           .getAlignment()
9691                           .alignmentOfArrayElement(ElementSize));
9692   // Privatize the declared variable of mapper to be the current array element.
9693   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9694   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9695   (void)Scope.Privatize();
9696 
9697   // Get map clause information. Fill up the arrays with all mapped variables.
9698   MappableExprsHandler::MapCombinedInfoTy Info;
9699   MappableExprsHandler MEHandler(*D, MapperCGF);
9700   MEHandler.generateAllInfoForMapper(Info);
9701 
9702   // Call the runtime API __tgt_mapper_num_components to get the number of
9703   // pre-existing components.
9704   llvm::Value *OffloadingArgs[] = {Handle};
9705   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9706       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9707                                             OMPRTL___tgt_mapper_num_components),
9708       OffloadingArgs);
9709   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9710       PreviousSize,
9711       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9712 
9713   // Fill up the runtime mapper handle for all components.
9714   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9715     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9716         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9717     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9718         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9719     llvm::Value *CurSizeArg = Info.Sizes[I];
9720     llvm::Value *CurNameArg =
9721         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9722             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9723             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9724 
9725     // Extract the MEMBER_OF field from the map type.
9726     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9727     llvm::Value *MemberMapType =
9728         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9729 
9730     // Combine the map type inherited from user-defined mapper with that
9731     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9732     // bits of the \a MapType, which is the input argument of the mapper
9733     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9734     // bits of MemberMapType.
9735     // [OpenMP 5.0], 1.2.6. map-type decay.
9736     //        | alloc |  to   | from  | tofrom | release | delete
9737     // ----------------------------------------------------------
9738     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9739     // to     | alloc |  to   | alloc |   to   | release | delete
9740     // from   | alloc | alloc | from  |  from  | release | delete
9741     // tofrom | alloc |  to   | from  | tofrom | release | delete
9742     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9743         MapType,
9744         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9745                                    MappableExprsHandler::OMP_MAP_FROM));
9746     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9747     llvm::BasicBlock *AllocElseBB =
9748         MapperCGF.createBasicBlock("omp.type.alloc.else");
9749     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9750     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9751     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9752     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9753     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9754     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9755     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9756     MapperCGF.EmitBlock(AllocBB);
9757     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9758         MemberMapType,
9759         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9760                                      MappableExprsHandler::OMP_MAP_FROM)));
9761     MapperCGF.Builder.CreateBr(EndBB);
9762     MapperCGF.EmitBlock(AllocElseBB);
9763     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9764         LeftToFrom,
9765         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9766     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9767     // In case of to, clear OMP_MAP_FROM.
9768     MapperCGF.EmitBlock(ToBB);
9769     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9770         MemberMapType,
9771         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9772     MapperCGF.Builder.CreateBr(EndBB);
9773     MapperCGF.EmitBlock(ToElseBB);
9774     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9775         LeftToFrom,
9776         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9777     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9778     // In case of from, clear OMP_MAP_TO.
9779     MapperCGF.EmitBlock(FromBB);
9780     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9781         MemberMapType,
9782         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9783     // In case of tofrom, do nothing.
9784     MapperCGF.EmitBlock(EndBB);
9785     LastBB = EndBB;
9786     llvm::PHINode *CurMapType =
9787         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9788     CurMapType->addIncoming(AllocMapType, AllocBB);
9789     CurMapType->addIncoming(ToMapType, ToBB);
9790     CurMapType->addIncoming(FromMapType, FromBB);
9791     CurMapType->addIncoming(MemberMapType, ToElseBB);
9792 
9793     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9794                                      CurSizeArg, CurMapType, CurNameArg};
9795     if (Info.Mappers[I]) {
9796       // Call the corresponding mapper function.
9797       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9798           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9799       assert(MapperFunc && "Expect a valid mapper function is available.");
9800       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9801     } else {
9802       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9803       // data structure.
9804       MapperCGF.EmitRuntimeCall(
9805           OMPBuilder.getOrCreateRuntimeFunction(
9806               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9807           OffloadingArgs);
9808     }
9809   }
9810 
9811   // Update the pointer to point to the next element that needs to be mapped,
9812   // and check whether we have mapped all elements.
9813   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9814       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9815   PtrPHI->addIncoming(PtrNext, LastBB);
9816   llvm::Value *IsDone =
9817       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9818   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9819   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9820 
9821   MapperCGF.EmitBlock(ExitBB);
9822   // Emit array deletion if this is an array section and \p MapType indicates
9823   // that deletion is required.
9824   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9825                              ElementSize, DoneBB, /*IsInit=*/false);
9826 
9827   // Emit the function exit block.
9828   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9829   MapperCGF.FinishFunction();
9830   UDMMap.try_emplace(D, Fn);
9831   if (CGF) {
9832     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9833     Decls.second.push_back(D);
9834   }
9835 }
9836 
9837 /// Emit the array initialization or deletion portion for user-defined mapper
9838 /// code generation. First, it evaluates whether an array section is mapped and
9839 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9840 /// true, and \a MapType indicates to not delete this array, array
9841 /// initialization code is generated. If \a IsInit is false, and \a MapType
9842 /// indicates to not this array, array deletion code is generated.
9843 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9844     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9845     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9846     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9847   StringRef Prefix = IsInit ? ".init" : ".del";
9848 
9849   // Evaluate if this is an array section.
9850   llvm::BasicBlock *BodyBB =
9851       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9852   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9853       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9854   // base != begin?
9855   llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9856       MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9857   llvm::Value *Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9858   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9859       MapType,
9860       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9861   llvm::Value *DeleteCond;
9862   if (IsInit) {
9863     DeleteCond = MapperCGF.Builder.CreateIsNull(
9864         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9865   } else {
9866     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9867         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9868   }
9869   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9870   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9871 
9872   MapperCGF.EmitBlock(BodyBB);
9873   // Get the array size by multiplying element size and element number (i.e., \p
9874   // Size).
9875   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9876       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9877   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9878   // memory allocation/deletion purpose only.
9879   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9880       MapType,
9881       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9882                                    MappableExprsHandler::OMP_MAP_FROM)));
9883   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9884 
9885   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9886   // data structure.
9887   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9888                                    ArraySize, MapTypeArg, MapNameArg};
9889   MapperCGF.EmitRuntimeCall(
9890       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9891                                             OMPRTL___tgt_push_mapper_component),
9892       OffloadingArgs);
9893 }
9894 
9895 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9896     const OMPDeclareMapperDecl *D) {
9897   auto I = UDMMap.find(D);
9898   if (I != UDMMap.end())
9899     return I->second;
9900   emitUserDefinedMapper(D);
9901   return UDMMap.lookup(D);
9902 }
9903 
9904 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9905     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9906     llvm::Value *DeviceID,
9907     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9908                                      const OMPLoopDirective &D)>
9909         SizeEmitter) {
9910   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9911   const OMPExecutableDirective *TD = &D;
9912   // Get nested teams distribute kind directive, if any.
9913   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9914     TD = getNestedDistributeDirective(CGM.getContext(), D);
9915   if (!TD)
9916     return;
9917   const auto *LD = cast<OMPLoopDirective>(TD);
9918   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9919                                                          PrePostActionTy &) {
9920     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9921       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9922       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9923       CGF.EmitRuntimeCall(
9924           OMPBuilder.getOrCreateRuntimeFunction(
9925               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9926           Args);
9927     }
9928   };
9929   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9930 }
9931 
9932 void CGOpenMPRuntime::emitTargetCall(
9933     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9934     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9935     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9936     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9937                                      const OMPLoopDirective &D)>
9938         SizeEmitter) {
9939   if (!CGF.HaveInsertPoint())
9940     return;
9941 
9942   assert(OutlinedFn && "Invalid outlined function!");
9943 
9944   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9945                                  D.hasClausesOfKind<OMPNowaitClause>();
9946   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9947   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9948   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9949                                             PrePostActionTy &) {
9950     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9951   };
9952   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9953 
9954   CodeGenFunction::OMPTargetDataInfo InputInfo;
9955   llvm::Value *MapTypesArray = nullptr;
9956   llvm::Value *MapNamesArray = nullptr;
9957   // Fill up the pointer arrays and transfer execution to the device.
9958   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9959                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9960                     &CapturedVars,
9961                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9962     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9963       // Reverse offloading is not supported, so just execute on the host.
9964       if (RequiresOuterTask) {
9965         CapturedVars.clear();
9966         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9967       }
9968       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9969       return;
9970     }
9971 
9972     // On top of the arrays that were filled up, the target offloading call
9973     // takes as arguments the device id as well as the host pointer. The host
9974     // pointer is used by the runtime library to identify the current target
9975     // region, so it only has to be unique and not necessarily point to
9976     // anything. It could be the pointer to the outlined function that
9977     // implements the target region, but we aren't using that so that the
9978     // compiler doesn't need to keep that, and could therefore inline the host
9979     // function if proven worthwhile during optimization.
9980 
9981     // From this point on, we need to have an ID of the target region defined.
9982     assert(OutlinedFnID && "Invalid outlined function ID!");
9983 
9984     // Emit device ID if any.
9985     llvm::Value *DeviceID;
9986     if (Device.getPointer()) {
9987       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9988               Device.getInt() == OMPC_DEVICE_device_num) &&
9989              "Expected device_num modifier.");
9990       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9991       DeviceID =
9992           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9993     } else {
9994       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9995     }
9996 
9997     // Emit the number of elements in the offloading arrays.
9998     llvm::Value *PointerNum =
9999         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10000 
10001     // Return value of the runtime offloading call.
10002     llvm::Value *Return;
10003 
10004     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10005     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10006 
10007     // Source location for the ident struct
10008     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10009 
10010     // Emit tripcount for the target loop-based directive.
10011     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10012 
10013     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10014     // The target region is an outlined function launched by the runtime
10015     // via calls __tgt_target() or __tgt_target_teams().
10016     //
10017     // __tgt_target() launches a target region with one team and one thread,
10018     // executing a serial region.  This master thread may in turn launch
10019     // more threads within its team upon encountering a parallel region,
10020     // however, no additional teams can be launched on the device.
10021     //
10022     // __tgt_target_teams() launches a target region with one or more teams,
10023     // each with one or more threads.  This call is required for target
10024     // constructs such as:
10025     //  'target teams'
10026     //  'target' / 'teams'
10027     //  'target teams distribute parallel for'
10028     //  'target parallel'
10029     // and so on.
10030     //
10031     // Note that on the host and CPU targets, the runtime implementation of
10032     // these calls simply call the outlined function without forking threads.
10033     // The outlined functions themselves have runtime calls to
10034     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10035     // the compiler in emitTeamsCall() and emitParallelCall().
10036     //
10037     // In contrast, on the NVPTX target, the implementation of
10038     // __tgt_target_teams() launches a GPU kernel with the requested number
10039     // of teams and threads so no additional calls to the runtime are required.
10040     if (NumTeams) {
10041       // If we have NumTeams defined this means that we have an enclosed teams
10042       // region. Therefore we also expect to have NumThreads defined. These two
10043       // values should be defined in the presence of a teams directive,
10044       // regardless of having any clauses associated. If the user is using teams
10045       // but no clauses, these two values will be the default that should be
10046       // passed to the runtime library - a 32-bit integer with the value zero.
10047       assert(NumThreads && "Thread limit expression should be available along "
10048                            "with number of teams.");
10049       llvm::Value *OffloadingArgs[] = {RTLoc,
10050                                        DeviceID,
10051                                        OutlinedFnID,
10052                                        PointerNum,
10053                                        InputInfo.BasePointersArray.getPointer(),
10054                                        InputInfo.PointersArray.getPointer(),
10055                                        InputInfo.SizesArray.getPointer(),
10056                                        MapTypesArray,
10057                                        MapNamesArray,
10058                                        InputInfo.MappersArray.getPointer(),
10059                                        NumTeams,
10060                                        NumThreads};
10061       Return = CGF.EmitRuntimeCall(
10062           OMPBuilder.getOrCreateRuntimeFunction(
10063               CGM.getModule(), HasNowait
10064                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10065                                    : OMPRTL___tgt_target_teams_mapper),
10066           OffloadingArgs);
10067     } else {
10068       llvm::Value *OffloadingArgs[] = {RTLoc,
10069                                        DeviceID,
10070                                        OutlinedFnID,
10071                                        PointerNum,
10072                                        InputInfo.BasePointersArray.getPointer(),
10073                                        InputInfo.PointersArray.getPointer(),
10074                                        InputInfo.SizesArray.getPointer(),
10075                                        MapTypesArray,
10076                                        MapNamesArray,
10077                                        InputInfo.MappersArray.getPointer()};
10078       Return = CGF.EmitRuntimeCall(
10079           OMPBuilder.getOrCreateRuntimeFunction(
10080               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10081                                          : OMPRTL___tgt_target_mapper),
10082           OffloadingArgs);
10083     }
10084 
10085     // Check the error code and execute the host version if required.
10086     llvm::BasicBlock *OffloadFailedBlock =
10087         CGF.createBasicBlock("omp_offload.failed");
10088     llvm::BasicBlock *OffloadContBlock =
10089         CGF.createBasicBlock("omp_offload.cont");
10090     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10091     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10092 
10093     CGF.EmitBlock(OffloadFailedBlock);
10094     if (RequiresOuterTask) {
10095       CapturedVars.clear();
10096       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10097     }
10098     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10099     CGF.EmitBranch(OffloadContBlock);
10100 
10101     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10102   };
10103 
10104   // Notify that the host version must be executed.
10105   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10106                     RequiresOuterTask](CodeGenFunction &CGF,
10107                                        PrePostActionTy &) {
10108     if (RequiresOuterTask) {
10109       CapturedVars.clear();
10110       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10111     }
10112     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10113   };
10114 
10115   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10116                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10117                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10118     // Fill up the arrays with all the captured variables.
10119     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10120 
10121     // Get mappable expression information.
10122     MappableExprsHandler MEHandler(D, CGF);
10123     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10124     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10125 
10126     auto RI = CS.getCapturedRecordDecl()->field_begin();
10127     auto *CV = CapturedVars.begin();
10128     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10129                                               CE = CS.capture_end();
10130          CI != CE; ++CI, ++RI, ++CV) {
10131       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10132       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10133 
10134       // VLA sizes are passed to the outlined region by copy and do not have map
10135       // information associated.
10136       if (CI->capturesVariableArrayType()) {
10137         CurInfo.Exprs.push_back(nullptr);
10138         CurInfo.BasePointers.push_back(*CV);
10139         CurInfo.Pointers.push_back(*CV);
10140         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10141             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10142         // Copy to the device as an argument. No need to retrieve it.
10143         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10144                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10145                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10146         CurInfo.Mappers.push_back(nullptr);
10147       } else {
10148         // If we have any information in the map clause, we use it, otherwise we
10149         // just do a default mapping.
10150         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10151         if (!CI->capturesThis())
10152           MappedVarSet.insert(CI->getCapturedVar());
10153         else
10154           MappedVarSet.insert(nullptr);
10155         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10156           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10157         // Generate correct mapping for variables captured by reference in
10158         // lambdas.
10159         if (CI->capturesVariable())
10160           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10161                                                   CurInfo, LambdaPointers);
10162       }
10163       // We expect to have at least an element of information for this capture.
10164       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10165              "Non-existing map pointer for capture!");
10166       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10167              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10168              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10169              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10170              "Inconsistent map information sizes!");
10171 
10172       // If there is an entry in PartialStruct it means we have a struct with
10173       // individual members mapped. Emit an extra combined entry.
10174       if (PartialStruct.Base.isValid())
10175         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
10176                                     nullptr, /*NoTargetParam=*/false);
10177 
10178       // We need to append the results of this capture to what we already have.
10179       CombinedInfo.append(CurInfo);
10180     }
10181     // Adjust MEMBER_OF flags for the lambdas captures.
10182     MEHandler.adjustMemberOfForLambdaCaptures(
10183         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10184         CombinedInfo.Types);
10185     // Map any list items in a map clause that were not captures because they
10186     // weren't referenced within the construct.
10187     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10188 
10189     TargetDataInfo Info;
10190     // Fill up the arrays and create the arguments.
10191     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10192     emitOffloadingArraysArgument(
10193         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10194         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10195         {/*ForEndTask=*/false});
10196 
10197     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10198     InputInfo.BasePointersArray =
10199         Address(Info.BasePointersArray, CGM.getPointerAlign());
10200     InputInfo.PointersArray =
10201         Address(Info.PointersArray, CGM.getPointerAlign());
10202     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10203     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10204     MapTypesArray = Info.MapTypesArray;
10205     MapNamesArray = Info.MapNamesArray;
10206     if (RequiresOuterTask)
10207       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10208     else
10209       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10210   };
10211 
10212   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10213                              CodeGenFunction &CGF, PrePostActionTy &) {
10214     if (RequiresOuterTask) {
10215       CodeGenFunction::OMPTargetDataInfo InputInfo;
10216       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10217     } else {
10218       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10219     }
10220   };
10221 
10222   // If we have a target function ID it means that we need to support
10223   // offloading, otherwise, just execute on the host. We need to execute on host
10224   // regardless of the conditional in the if clause if, e.g., the user do not
10225   // specify target triples.
10226   if (OutlinedFnID) {
10227     if (IfCond) {
10228       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10229     } else {
10230       RegionCodeGenTy ThenRCG(TargetThenGen);
10231       ThenRCG(CGF);
10232     }
10233   } else {
10234     RegionCodeGenTy ElseRCG(TargetElseGen);
10235     ElseRCG(CGF);
10236   }
10237 }
10238 
10239 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10240                                                     StringRef ParentName) {
10241   if (!S)
10242     return;
10243 
10244   // Codegen OMP target directives that offload compute to the device.
10245   bool RequiresDeviceCodegen =
10246       isa<OMPExecutableDirective>(S) &&
10247       isOpenMPTargetExecutionDirective(
10248           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10249 
10250   if (RequiresDeviceCodegen) {
10251     const auto &E = *cast<OMPExecutableDirective>(S);
10252     unsigned DeviceID;
10253     unsigned FileID;
10254     unsigned Line;
10255     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10256                              FileID, Line);
10257 
10258     // Is this a target region that should not be emitted as an entry point? If
10259     // so just signal we are done with this target region.
10260     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10261                                                             ParentName, Line))
10262       return;
10263 
10264     switch (E.getDirectiveKind()) {
10265     case OMPD_target:
10266       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10267                                                    cast<OMPTargetDirective>(E));
10268       break;
10269     case OMPD_target_parallel:
10270       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10271           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10272       break;
10273     case OMPD_target_teams:
10274       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10275           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10276       break;
10277     case OMPD_target_teams_distribute:
10278       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10279           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10280       break;
10281     case OMPD_target_teams_distribute_simd:
10282       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10283           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10284       break;
10285     case OMPD_target_parallel_for:
10286       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10287           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10288       break;
10289     case OMPD_target_parallel_for_simd:
10290       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10291           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10292       break;
10293     case OMPD_target_simd:
10294       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10295           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10296       break;
10297     case OMPD_target_teams_distribute_parallel_for:
10298       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10299           CGM, ParentName,
10300           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10301       break;
10302     case OMPD_target_teams_distribute_parallel_for_simd:
10303       CodeGenFunction::
10304           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10305               CGM, ParentName,
10306               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10307       break;
10308     case OMPD_parallel:
10309     case OMPD_for:
10310     case OMPD_parallel_for:
10311     case OMPD_parallel_master:
10312     case OMPD_parallel_sections:
10313     case OMPD_for_simd:
10314     case OMPD_parallel_for_simd:
10315     case OMPD_cancel:
10316     case OMPD_cancellation_point:
10317     case OMPD_ordered:
10318     case OMPD_threadprivate:
10319     case OMPD_allocate:
10320     case OMPD_task:
10321     case OMPD_simd:
10322     case OMPD_tile:
10323     case OMPD_sections:
10324     case OMPD_section:
10325     case OMPD_single:
10326     case OMPD_master:
10327     case OMPD_critical:
10328     case OMPD_taskyield:
10329     case OMPD_barrier:
10330     case OMPD_taskwait:
10331     case OMPD_taskgroup:
10332     case OMPD_atomic:
10333     case OMPD_flush:
10334     case OMPD_depobj:
10335     case OMPD_scan:
10336     case OMPD_teams:
10337     case OMPD_target_data:
10338     case OMPD_target_exit_data:
10339     case OMPD_target_enter_data:
10340     case OMPD_distribute:
10341     case OMPD_distribute_simd:
10342     case OMPD_distribute_parallel_for:
10343     case OMPD_distribute_parallel_for_simd:
10344     case OMPD_teams_distribute:
10345     case OMPD_teams_distribute_simd:
10346     case OMPD_teams_distribute_parallel_for:
10347     case OMPD_teams_distribute_parallel_for_simd:
10348     case OMPD_target_update:
10349     case OMPD_declare_simd:
10350     case OMPD_declare_variant:
10351     case OMPD_begin_declare_variant:
10352     case OMPD_end_declare_variant:
10353     case OMPD_declare_target:
10354     case OMPD_end_declare_target:
10355     case OMPD_declare_reduction:
10356     case OMPD_declare_mapper:
10357     case OMPD_taskloop:
10358     case OMPD_taskloop_simd:
10359     case OMPD_master_taskloop:
10360     case OMPD_master_taskloop_simd:
10361     case OMPD_parallel_master_taskloop:
10362     case OMPD_parallel_master_taskloop_simd:
10363     case OMPD_requires:
10364     case OMPD_unknown:
10365     default:
10366       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10367     }
10368     return;
10369   }
10370 
10371   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10372     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10373       return;
10374 
10375     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10376     return;
10377   }
10378 
10379   // If this is a lambda function, look into its body.
10380   if (const auto *L = dyn_cast<LambdaExpr>(S))
10381     S = L->getBody();
10382 
10383   // Keep looking for target regions recursively.
10384   for (const Stmt *II : S->children())
10385     scanForTargetRegionsFunctions(II, ParentName);
10386 }
10387 
10388 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10389   // If emitting code for the host, we do not process FD here. Instead we do
10390   // the normal code generation.
10391   if (!CGM.getLangOpts().OpenMPIsDevice) {
10392     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10393       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10394           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10395       // Do not emit device_type(nohost) functions for the host.
10396       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10397         return true;
10398     }
10399     return false;
10400   }
10401 
10402   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10403   // Try to detect target regions in the function.
10404   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10405     StringRef Name = CGM.getMangledName(GD);
10406     scanForTargetRegionsFunctions(FD->getBody(), Name);
10407     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10408         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10409     // Do not emit device_type(nohost) functions for the host.
10410     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10411       return true;
10412   }
10413 
10414   // Do not to emit function if it is not marked as declare target.
10415   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10416          AlreadyEmittedTargetDecls.count(VD) == 0;
10417 }
10418 
10419 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10420   if (!CGM.getLangOpts().OpenMPIsDevice)
10421     return false;
10422 
10423   // Check if there are Ctors/Dtors in this declaration and look for target
10424   // regions in it. We use the complete variant to produce the kernel name
10425   // mangling.
10426   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10427   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10428     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10429       StringRef ParentName =
10430           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10431       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10432     }
10433     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10434       StringRef ParentName =
10435           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10436       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10437     }
10438   }
10439 
10440   // Do not to emit variable if it is not marked as declare target.
10441   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10442       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10443           cast<VarDecl>(GD.getDecl()));
10444   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10445       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10446        HasRequiresUnifiedSharedMemory)) {
10447     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10448     return true;
10449   }
10450   return false;
10451 }
10452 
10453 llvm::Constant *
10454 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10455                                                 const VarDecl *VD) {
10456   assert(VD->getType().isConstant(CGM.getContext()) &&
10457          "Expected constant variable.");
10458   StringRef VarName;
10459   llvm::Constant *Addr;
10460   llvm::GlobalValue::LinkageTypes Linkage;
10461   QualType Ty = VD->getType();
10462   SmallString<128> Buffer;
10463   {
10464     unsigned DeviceID;
10465     unsigned FileID;
10466     unsigned Line;
10467     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10468                              FileID, Line);
10469     llvm::raw_svector_ostream OS(Buffer);
10470     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10471        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10472     VarName = OS.str();
10473   }
10474   Linkage = llvm::GlobalValue::InternalLinkage;
10475   Addr =
10476       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10477                                   getDefaultFirstprivateAddressSpace());
10478   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10479   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10480   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10481   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10482       VarName, Addr, VarSize,
10483       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10484   return Addr;
10485 }
10486 
10487 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10488                                                    llvm::Constant *Addr) {
10489   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10490       !CGM.getLangOpts().OpenMPIsDevice)
10491     return;
10492   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10493       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10494   if (!Res) {
10495     if (CGM.getLangOpts().OpenMPIsDevice) {
10496       // Register non-target variables being emitted in device code (debug info
10497       // may cause this).
10498       StringRef VarName = CGM.getMangledName(VD);
10499       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10500     }
10501     return;
10502   }
10503   // Register declare target variables.
10504   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10505   StringRef VarName;
10506   CharUnits VarSize;
10507   llvm::GlobalValue::LinkageTypes Linkage;
10508 
10509   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10510       !HasRequiresUnifiedSharedMemory) {
10511     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10512     VarName = CGM.getMangledName(VD);
10513     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10514       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10515       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10516     } else {
10517       VarSize = CharUnits::Zero();
10518     }
10519     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10520     // Temp solution to prevent optimizations of the internal variables.
10521     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10522       std::string RefName = getName({VarName, "ref"});
10523       if (!CGM.GetGlobalValue(RefName)) {
10524         llvm::Constant *AddrRef =
10525             getOrCreateInternalVariable(Addr->getType(), RefName);
10526         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10527         GVAddrRef->setConstant(/*Val=*/true);
10528         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10529         GVAddrRef->setInitializer(Addr);
10530         CGM.addCompilerUsedGlobal(GVAddrRef);
10531       }
10532     }
10533   } else {
10534     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10535             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10536              HasRequiresUnifiedSharedMemory)) &&
10537            "Declare target attribute must link or to with unified memory.");
10538     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10539       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10540     else
10541       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10542 
10543     if (CGM.getLangOpts().OpenMPIsDevice) {
10544       VarName = Addr->getName();
10545       Addr = nullptr;
10546     } else {
10547       VarName = getAddrOfDeclareTargetVar(VD).getName();
10548       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10549     }
10550     VarSize = CGM.getPointerSize();
10551     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10552   }
10553 
10554   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10555       VarName, Addr, VarSize, Flags, Linkage);
10556 }
10557 
10558 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10559   if (isa<FunctionDecl>(GD.getDecl()) ||
10560       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10561     return emitTargetFunctions(GD);
10562 
10563   return emitTargetGlobalVariable(GD);
10564 }
10565 
10566 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10567   for (const VarDecl *VD : DeferredGlobalVariables) {
10568     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10569         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10570     if (!Res)
10571       continue;
10572     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10573         !HasRequiresUnifiedSharedMemory) {
10574       CGM.EmitGlobal(VD);
10575     } else {
10576       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10577               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10578                HasRequiresUnifiedSharedMemory)) &&
10579              "Expected link clause or to clause with unified memory.");
10580       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10581     }
10582   }
10583 }
10584 
10585 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10586     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10587   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10588          " Expected target-based directive.");
10589 }
10590 
10591 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10592   for (const OMPClause *Clause : D->clauselists()) {
10593     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10594       HasRequiresUnifiedSharedMemory = true;
10595     } else if (const auto *AC =
10596                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10597       switch (AC->getAtomicDefaultMemOrderKind()) {
10598       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10599         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10600         break;
10601       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10602         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10603         break;
10604       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10605         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10606         break;
10607       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10608         break;
10609       }
10610     }
10611   }
10612 }
10613 
10614 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10615   return RequiresAtomicOrdering;
10616 }
10617 
10618 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10619                                                        LangAS &AS) {
10620   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10621     return false;
10622   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10623   switch(A->getAllocatorType()) {
10624   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10625   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10626   // Not supported, fallback to the default mem space.
10627   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10628   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10629   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10630   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10631   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10632   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10633   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10634     AS = LangAS::Default;
10635     return true;
10636   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10637     llvm_unreachable("Expected predefined allocator for the variables with the "
10638                      "static storage.");
10639   }
10640   return false;
10641 }
10642 
10643 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10644   return HasRequiresUnifiedSharedMemory;
10645 }
10646 
10647 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10648     CodeGenModule &CGM)
10649     : CGM(CGM) {
10650   if (CGM.getLangOpts().OpenMPIsDevice) {
10651     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10652     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10653   }
10654 }
10655 
10656 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10657   if (CGM.getLangOpts().OpenMPIsDevice)
10658     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10659 }
10660 
10661 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10662   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10663     return true;
10664 
10665   const auto *D = cast<FunctionDecl>(GD.getDecl());
10666   // Do not to emit function if it is marked as declare target as it was already
10667   // emitted.
10668   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10669     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10670       if (auto *F = dyn_cast_or_null<llvm::Function>(
10671               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10672         return !F->isDeclaration();
10673       return false;
10674     }
10675     return true;
10676   }
10677 
10678   return !AlreadyEmittedTargetDecls.insert(D).second;
10679 }
10680 
10681 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10682   // If we don't have entries or if we are emitting code for the device, we
10683   // don't need to do anything.
10684   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10685       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10686       (OffloadEntriesInfoManager.empty() &&
10687        !HasEmittedDeclareTargetRegion &&
10688        !HasEmittedTargetRegion))
10689     return nullptr;
10690 
10691   // Create and register the function that handles the requires directives.
10692   ASTContext &C = CGM.getContext();
10693 
10694   llvm::Function *RequiresRegFn;
10695   {
10696     CodeGenFunction CGF(CGM);
10697     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10698     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10699     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10700     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10701     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10702     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10703     // TODO: check for other requires clauses.
10704     // The requires directive takes effect only when a target region is
10705     // present in the compilation unit. Otherwise it is ignored and not
10706     // passed to the runtime. This avoids the runtime from throwing an error
10707     // for mismatching requires clauses across compilation units that don't
10708     // contain at least 1 target region.
10709     assert((HasEmittedTargetRegion ||
10710             HasEmittedDeclareTargetRegion ||
10711             !OffloadEntriesInfoManager.empty()) &&
10712            "Target or declare target region expected.");
10713     if (HasRequiresUnifiedSharedMemory)
10714       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10715     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10716                             CGM.getModule(), OMPRTL___tgt_register_requires),
10717                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10718     CGF.FinishFunction();
10719   }
10720   return RequiresRegFn;
10721 }
10722 
10723 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10724                                     const OMPExecutableDirective &D,
10725                                     SourceLocation Loc,
10726                                     llvm::Function *OutlinedFn,
10727                                     ArrayRef<llvm::Value *> CapturedVars) {
10728   if (!CGF.HaveInsertPoint())
10729     return;
10730 
10731   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10732   CodeGenFunction::RunCleanupsScope Scope(CGF);
10733 
10734   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10735   llvm::Value *Args[] = {
10736       RTLoc,
10737       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10738       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10739   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10740   RealArgs.append(std::begin(Args), std::end(Args));
10741   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10742 
10743   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10744       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10745   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10746 }
10747 
10748 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10749                                          const Expr *NumTeams,
10750                                          const Expr *ThreadLimit,
10751                                          SourceLocation Loc) {
10752   if (!CGF.HaveInsertPoint())
10753     return;
10754 
10755   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10756 
10757   llvm::Value *NumTeamsVal =
10758       NumTeams
10759           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10760                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10761           : CGF.Builder.getInt32(0);
10762 
10763   llvm::Value *ThreadLimitVal =
10764       ThreadLimit
10765           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10766                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10767           : CGF.Builder.getInt32(0);
10768 
10769   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10770   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10771                                      ThreadLimitVal};
10772   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10773                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10774                       PushNumTeamsArgs);
10775 }
10776 
10777 void CGOpenMPRuntime::emitTargetDataCalls(
10778     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10779     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10780   if (!CGF.HaveInsertPoint())
10781     return;
10782 
10783   // Action used to replace the default codegen action and turn privatization
10784   // off.
10785   PrePostActionTy NoPrivAction;
10786 
10787   // Generate the code for the opening of the data environment. Capture all the
10788   // arguments of the runtime call by reference because they are used in the
10789   // closing of the region.
10790   auto &&BeginThenGen = [this, &D, Device, &Info,
10791                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10792     // Fill up the arrays with all the mapped variables.
10793     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10794 
10795     // Get map clause information.
10796     MappableExprsHandler MEHandler(D, CGF);
10797     MEHandler.generateAllInfo(CombinedInfo);
10798 
10799     // Fill up the arrays and create the arguments.
10800     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10801                          /*IsNonContiguous=*/true);
10802 
10803     llvm::Value *BasePointersArrayArg = nullptr;
10804     llvm::Value *PointersArrayArg = nullptr;
10805     llvm::Value *SizesArrayArg = nullptr;
10806     llvm::Value *MapTypesArrayArg = nullptr;
10807     llvm::Value *MapNamesArrayArg = nullptr;
10808     llvm::Value *MappersArrayArg = nullptr;
10809     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10810                                  SizesArrayArg, MapTypesArrayArg,
10811                                  MapNamesArrayArg, MappersArrayArg, Info);
10812 
10813     // Emit device ID if any.
10814     llvm::Value *DeviceID = nullptr;
10815     if (Device) {
10816       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10817                                            CGF.Int64Ty, /*isSigned=*/true);
10818     } else {
10819       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10820     }
10821 
10822     // Emit the number of elements in the offloading arrays.
10823     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10824     //
10825     // Source location for the ident struct
10826     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10827 
10828     llvm::Value *OffloadingArgs[] = {RTLoc,
10829                                      DeviceID,
10830                                      PointerNum,
10831                                      BasePointersArrayArg,
10832                                      PointersArrayArg,
10833                                      SizesArrayArg,
10834                                      MapTypesArrayArg,
10835                                      MapNamesArrayArg,
10836                                      MappersArrayArg};
10837     CGF.EmitRuntimeCall(
10838         OMPBuilder.getOrCreateRuntimeFunction(
10839             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10840         OffloadingArgs);
10841 
10842     // If device pointer privatization is required, emit the body of the region
10843     // here. It will have to be duplicated: with and without privatization.
10844     if (!Info.CaptureDeviceAddrMap.empty())
10845       CodeGen(CGF);
10846   };
10847 
10848   // Generate code for the closing of the data region.
10849   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10850                                                 PrePostActionTy &) {
10851     assert(Info.isValid() && "Invalid data environment closing arguments.");
10852 
10853     llvm::Value *BasePointersArrayArg = nullptr;
10854     llvm::Value *PointersArrayArg = nullptr;
10855     llvm::Value *SizesArrayArg = nullptr;
10856     llvm::Value *MapTypesArrayArg = nullptr;
10857     llvm::Value *MapNamesArrayArg = nullptr;
10858     llvm::Value *MappersArrayArg = nullptr;
10859     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10860                                  SizesArrayArg, MapTypesArrayArg,
10861                                  MapNamesArrayArg, MappersArrayArg, Info,
10862                                  {/*ForEndCall=*/true});
10863 
10864     // Emit device ID if any.
10865     llvm::Value *DeviceID = nullptr;
10866     if (Device) {
10867       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10868                                            CGF.Int64Ty, /*isSigned=*/true);
10869     } else {
10870       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10871     }
10872 
10873     // Emit the number of elements in the offloading arrays.
10874     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10875 
10876     // Source location for the ident struct
10877     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10878 
10879     llvm::Value *OffloadingArgs[] = {RTLoc,
10880                                      DeviceID,
10881                                      PointerNum,
10882                                      BasePointersArrayArg,
10883                                      PointersArrayArg,
10884                                      SizesArrayArg,
10885                                      MapTypesArrayArg,
10886                                      MapNamesArrayArg,
10887                                      MappersArrayArg};
10888     CGF.EmitRuntimeCall(
10889         OMPBuilder.getOrCreateRuntimeFunction(
10890             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10891         OffloadingArgs);
10892   };
10893 
10894   // If we need device pointer privatization, we need to emit the body of the
10895   // region with no privatization in the 'else' branch of the conditional.
10896   // Otherwise, we don't have to do anything.
10897   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10898                                                          PrePostActionTy &) {
10899     if (!Info.CaptureDeviceAddrMap.empty()) {
10900       CodeGen.setAction(NoPrivAction);
10901       CodeGen(CGF);
10902     }
10903   };
10904 
10905   // We don't have to do anything to close the region if the if clause evaluates
10906   // to false.
10907   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10908 
10909   if (IfCond) {
10910     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10911   } else {
10912     RegionCodeGenTy RCG(BeginThenGen);
10913     RCG(CGF);
10914   }
10915 
10916   // If we don't require privatization of device pointers, we emit the body in
10917   // between the runtime calls. This avoids duplicating the body code.
10918   if (Info.CaptureDeviceAddrMap.empty()) {
10919     CodeGen.setAction(NoPrivAction);
10920     CodeGen(CGF);
10921   }
10922 
10923   if (IfCond) {
10924     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10925   } else {
10926     RegionCodeGenTy RCG(EndThenGen);
10927     RCG(CGF);
10928   }
10929 }
10930 
10931 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10932     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10933     const Expr *Device) {
10934   if (!CGF.HaveInsertPoint())
10935     return;
10936 
10937   assert((isa<OMPTargetEnterDataDirective>(D) ||
10938           isa<OMPTargetExitDataDirective>(D) ||
10939           isa<OMPTargetUpdateDirective>(D)) &&
10940          "Expecting either target enter, exit data, or update directives.");
10941 
10942   CodeGenFunction::OMPTargetDataInfo InputInfo;
10943   llvm::Value *MapTypesArray = nullptr;
10944   llvm::Value *MapNamesArray = nullptr;
10945   // Generate the code for the opening of the data environment.
10946   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10947                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10948     // Emit device ID if any.
10949     llvm::Value *DeviceID = nullptr;
10950     if (Device) {
10951       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10952                                            CGF.Int64Ty, /*isSigned=*/true);
10953     } else {
10954       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10955     }
10956 
10957     // Emit the number of elements in the offloading arrays.
10958     llvm::Constant *PointerNum =
10959         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10960 
10961     // Source location for the ident struct
10962     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10963 
10964     llvm::Value *OffloadingArgs[] = {RTLoc,
10965                                      DeviceID,
10966                                      PointerNum,
10967                                      InputInfo.BasePointersArray.getPointer(),
10968                                      InputInfo.PointersArray.getPointer(),
10969                                      InputInfo.SizesArray.getPointer(),
10970                                      MapTypesArray,
10971                                      MapNamesArray,
10972                                      InputInfo.MappersArray.getPointer()};
10973 
10974     // Select the right runtime function call for each standalone
10975     // directive.
10976     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10977     RuntimeFunction RTLFn;
10978     switch (D.getDirectiveKind()) {
10979     case OMPD_target_enter_data:
10980       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10981                         : OMPRTL___tgt_target_data_begin_mapper;
10982       break;
10983     case OMPD_target_exit_data:
10984       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10985                         : OMPRTL___tgt_target_data_end_mapper;
10986       break;
10987     case OMPD_target_update:
10988       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10989                         : OMPRTL___tgt_target_data_update_mapper;
10990       break;
10991     case OMPD_parallel:
10992     case OMPD_for:
10993     case OMPD_parallel_for:
10994     case OMPD_parallel_master:
10995     case OMPD_parallel_sections:
10996     case OMPD_for_simd:
10997     case OMPD_parallel_for_simd:
10998     case OMPD_cancel:
10999     case OMPD_cancellation_point:
11000     case OMPD_ordered:
11001     case OMPD_threadprivate:
11002     case OMPD_allocate:
11003     case OMPD_task:
11004     case OMPD_simd:
11005     case OMPD_tile:
11006     case OMPD_sections:
11007     case OMPD_section:
11008     case OMPD_single:
11009     case OMPD_master:
11010     case OMPD_critical:
11011     case OMPD_taskyield:
11012     case OMPD_barrier:
11013     case OMPD_taskwait:
11014     case OMPD_taskgroup:
11015     case OMPD_atomic:
11016     case OMPD_flush:
11017     case OMPD_depobj:
11018     case OMPD_scan:
11019     case OMPD_teams:
11020     case OMPD_target_data:
11021     case OMPD_distribute:
11022     case OMPD_distribute_simd:
11023     case OMPD_distribute_parallel_for:
11024     case OMPD_distribute_parallel_for_simd:
11025     case OMPD_teams_distribute:
11026     case OMPD_teams_distribute_simd:
11027     case OMPD_teams_distribute_parallel_for:
11028     case OMPD_teams_distribute_parallel_for_simd:
11029     case OMPD_declare_simd:
11030     case OMPD_declare_variant:
11031     case OMPD_begin_declare_variant:
11032     case OMPD_end_declare_variant:
11033     case OMPD_declare_target:
11034     case OMPD_end_declare_target:
11035     case OMPD_declare_reduction:
11036     case OMPD_declare_mapper:
11037     case OMPD_taskloop:
11038     case OMPD_taskloop_simd:
11039     case OMPD_master_taskloop:
11040     case OMPD_master_taskloop_simd:
11041     case OMPD_parallel_master_taskloop:
11042     case OMPD_parallel_master_taskloop_simd:
11043     case OMPD_target:
11044     case OMPD_target_simd:
11045     case OMPD_target_teams_distribute:
11046     case OMPD_target_teams_distribute_simd:
11047     case OMPD_target_teams_distribute_parallel_for:
11048     case OMPD_target_teams_distribute_parallel_for_simd:
11049     case OMPD_target_teams:
11050     case OMPD_target_parallel:
11051     case OMPD_target_parallel_for:
11052     case OMPD_target_parallel_for_simd:
11053     case OMPD_requires:
11054     case OMPD_unknown:
11055     default:
11056       llvm_unreachable("Unexpected standalone target data directive.");
11057       break;
11058     }
11059     CGF.EmitRuntimeCall(
11060         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11061         OffloadingArgs);
11062   };
11063 
11064   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11065                           &MapNamesArray](CodeGenFunction &CGF,
11066                                           PrePostActionTy &) {
11067     // Fill up the arrays with all the mapped variables.
11068     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11069 
11070     // Get map clause information.
11071     MappableExprsHandler MEHandler(D, CGF);
11072     MEHandler.generateAllInfo(CombinedInfo);
11073 
11074     TargetDataInfo Info;
11075     // Fill up the arrays and create the arguments.
11076     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11077                          /*IsNonContiguous=*/true);
11078     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11079                              D.hasClausesOfKind<OMPNowaitClause>();
11080     emitOffloadingArraysArgument(
11081         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11082         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11083         {/*ForEndTask=*/false});
11084     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11085     InputInfo.BasePointersArray =
11086         Address(Info.BasePointersArray, CGM.getPointerAlign());
11087     InputInfo.PointersArray =
11088         Address(Info.PointersArray, CGM.getPointerAlign());
11089     InputInfo.SizesArray =
11090         Address(Info.SizesArray, CGM.getPointerAlign());
11091     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11092     MapTypesArray = Info.MapTypesArray;
11093     MapNamesArray = Info.MapNamesArray;
11094     if (RequiresOuterTask)
11095       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11096     else
11097       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11098   };
11099 
11100   if (IfCond) {
11101     emitIfClause(CGF, IfCond, TargetThenGen,
11102                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11103   } else {
11104     RegionCodeGenTy ThenRCG(TargetThenGen);
11105     ThenRCG(CGF);
11106   }
11107 }
11108 
11109 namespace {
11110   /// Kind of parameter in a function with 'declare simd' directive.
11111   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11112   /// Attribute set of the parameter.
11113   struct ParamAttrTy {
11114     ParamKindTy Kind = Vector;
11115     llvm::APSInt StrideOrArg;
11116     llvm::APSInt Alignment;
11117   };
11118 } // namespace
11119 
11120 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11121                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11122   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11123   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11124   // of that clause. The VLEN value must be power of 2.
11125   // In other case the notion of the function`s "characteristic data type" (CDT)
11126   // is used to compute the vector length.
11127   // CDT is defined in the following order:
11128   //   a) For non-void function, the CDT is the return type.
11129   //   b) If the function has any non-uniform, non-linear parameters, then the
11130   //   CDT is the type of the first such parameter.
11131   //   c) If the CDT determined by a) or b) above is struct, union, or class
11132   //   type which is pass-by-value (except for the type that maps to the
11133   //   built-in complex data type), the characteristic data type is int.
11134   //   d) If none of the above three cases is applicable, the CDT is int.
11135   // The VLEN is then determined based on the CDT and the size of vector
11136   // register of that ISA for which current vector version is generated. The
11137   // VLEN is computed using the formula below:
11138   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11139   // where vector register size specified in section 3.2.1 Registers and the
11140   // Stack Frame of original AMD64 ABI document.
11141   QualType RetType = FD->getReturnType();
11142   if (RetType.isNull())
11143     return 0;
11144   ASTContext &C = FD->getASTContext();
11145   QualType CDT;
11146   if (!RetType.isNull() && !RetType->isVoidType()) {
11147     CDT = RetType;
11148   } else {
11149     unsigned Offset = 0;
11150     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11151       if (ParamAttrs[Offset].Kind == Vector)
11152         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11153       ++Offset;
11154     }
11155     if (CDT.isNull()) {
11156       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11157         if (ParamAttrs[I + Offset].Kind == Vector) {
11158           CDT = FD->getParamDecl(I)->getType();
11159           break;
11160         }
11161       }
11162     }
11163   }
11164   if (CDT.isNull())
11165     CDT = C.IntTy;
11166   CDT = CDT->getCanonicalTypeUnqualified();
11167   if (CDT->isRecordType() || CDT->isUnionType())
11168     CDT = C.IntTy;
11169   return C.getTypeSize(CDT);
11170 }
11171 
11172 static void
11173 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11174                            const llvm::APSInt &VLENVal,
11175                            ArrayRef<ParamAttrTy> ParamAttrs,
11176                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11177   struct ISADataTy {
11178     char ISA;
11179     unsigned VecRegSize;
11180   };
11181   ISADataTy ISAData[] = {
11182       {
11183           'b', 128
11184       }, // SSE
11185       {
11186           'c', 256
11187       }, // AVX
11188       {
11189           'd', 256
11190       }, // AVX2
11191       {
11192           'e', 512
11193       }, // AVX512
11194   };
11195   llvm::SmallVector<char, 2> Masked;
11196   switch (State) {
11197   case OMPDeclareSimdDeclAttr::BS_Undefined:
11198     Masked.push_back('N');
11199     Masked.push_back('M');
11200     break;
11201   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11202     Masked.push_back('N');
11203     break;
11204   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11205     Masked.push_back('M');
11206     break;
11207   }
11208   for (char Mask : Masked) {
11209     for (const ISADataTy &Data : ISAData) {
11210       SmallString<256> Buffer;
11211       llvm::raw_svector_ostream Out(Buffer);
11212       Out << "_ZGV" << Data.ISA << Mask;
11213       if (!VLENVal) {
11214         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11215         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11216         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11217       } else {
11218         Out << VLENVal;
11219       }
11220       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11221         switch (ParamAttr.Kind){
11222         case LinearWithVarStride:
11223           Out << 's' << ParamAttr.StrideOrArg;
11224           break;
11225         case Linear:
11226           Out << 'l';
11227           if (ParamAttr.StrideOrArg != 1)
11228             Out << ParamAttr.StrideOrArg;
11229           break;
11230         case Uniform:
11231           Out << 'u';
11232           break;
11233         case Vector:
11234           Out << 'v';
11235           break;
11236         }
11237         if (!!ParamAttr.Alignment)
11238           Out << 'a' << ParamAttr.Alignment;
11239       }
11240       Out << '_' << Fn->getName();
11241       Fn->addFnAttr(Out.str());
11242     }
11243   }
11244 }
11245 
11246 // This are the Functions that are needed to mangle the name of the
11247 // vector functions generated by the compiler, according to the rules
11248 // defined in the "Vector Function ABI specifications for AArch64",
11249 // available at
11250 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11251 
11252 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11253 ///
11254 /// TODO: Need to implement the behavior for reference marked with a
11255 /// var or no linear modifiers (1.b in the section). For this, we
11256 /// need to extend ParamKindTy to support the linear modifiers.
11257 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11258   QT = QT.getCanonicalType();
11259 
11260   if (QT->isVoidType())
11261     return false;
11262 
11263   if (Kind == ParamKindTy::Uniform)
11264     return false;
11265 
11266   if (Kind == ParamKindTy::Linear)
11267     return false;
11268 
11269   // TODO: Handle linear references with modifiers
11270 
11271   if (Kind == ParamKindTy::LinearWithVarStride)
11272     return false;
11273 
11274   return true;
11275 }
11276 
11277 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11278 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11279   QT = QT.getCanonicalType();
11280   unsigned Size = C.getTypeSize(QT);
11281 
11282   // Only scalars and complex within 16 bytes wide set PVB to true.
11283   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11284     return false;
11285 
11286   if (QT->isFloatingType())
11287     return true;
11288 
11289   if (QT->isIntegerType())
11290     return true;
11291 
11292   if (QT->isPointerType())
11293     return true;
11294 
11295   // TODO: Add support for complex types (section 3.1.2, item 2).
11296 
11297   return false;
11298 }
11299 
11300 /// Computes the lane size (LS) of a return type or of an input parameter,
11301 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11302 /// TODO: Add support for references, section 3.2.1, item 1.
11303 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11304   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11305     QualType PTy = QT.getCanonicalType()->getPointeeType();
11306     if (getAArch64PBV(PTy, C))
11307       return C.getTypeSize(PTy);
11308   }
11309   if (getAArch64PBV(QT, C))
11310     return C.getTypeSize(QT);
11311 
11312   return C.getTypeSize(C.getUIntPtrType());
11313 }
11314 
11315 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11316 // signature of the scalar function, as defined in 3.2.2 of the
11317 // AAVFABI.
11318 static std::tuple<unsigned, unsigned, bool>
11319 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11320   QualType RetType = FD->getReturnType().getCanonicalType();
11321 
11322   ASTContext &C = FD->getASTContext();
11323 
11324   bool OutputBecomesInput = false;
11325 
11326   llvm::SmallVector<unsigned, 8> Sizes;
11327   if (!RetType->isVoidType()) {
11328     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11329     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11330       OutputBecomesInput = true;
11331   }
11332   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11333     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11334     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11335   }
11336 
11337   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11338   // The LS of a function parameter / return value can only be a power
11339   // of 2, starting from 8 bits, up to 128.
11340   assert(std::all_of(Sizes.begin(), Sizes.end(),
11341                      [](unsigned Size) {
11342                        return Size == 8 || Size == 16 || Size == 32 ||
11343                               Size == 64 || Size == 128;
11344                      }) &&
11345          "Invalid size");
11346 
11347   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11348                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11349                          OutputBecomesInput);
11350 }
11351 
11352 /// Mangle the parameter part of the vector function name according to
11353 /// their OpenMP classification. The mangling function is defined in
11354 /// section 3.5 of the AAVFABI.
11355 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11356   SmallString<256> Buffer;
11357   llvm::raw_svector_ostream Out(Buffer);
11358   for (const auto &ParamAttr : ParamAttrs) {
11359     switch (ParamAttr.Kind) {
11360     case LinearWithVarStride:
11361       Out << "ls" << ParamAttr.StrideOrArg;
11362       break;
11363     case Linear:
11364       Out << 'l';
11365       // Don't print the step value if it is not present or if it is
11366       // equal to 1.
11367       if (ParamAttr.StrideOrArg != 1)
11368         Out << ParamAttr.StrideOrArg;
11369       break;
11370     case Uniform:
11371       Out << 'u';
11372       break;
11373     case Vector:
11374       Out << 'v';
11375       break;
11376     }
11377 
11378     if (!!ParamAttr.Alignment)
11379       Out << 'a' << ParamAttr.Alignment;
11380   }
11381 
11382   return std::string(Out.str());
11383 }
11384 
11385 // Function used to add the attribute. The parameter `VLEN` is
11386 // templated to allow the use of "x" when targeting scalable functions
11387 // for SVE.
11388 template <typename T>
11389 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11390                                  char ISA, StringRef ParSeq,
11391                                  StringRef MangledName, bool OutputBecomesInput,
11392                                  llvm::Function *Fn) {
11393   SmallString<256> Buffer;
11394   llvm::raw_svector_ostream Out(Buffer);
11395   Out << Prefix << ISA << LMask << VLEN;
11396   if (OutputBecomesInput)
11397     Out << "v";
11398   Out << ParSeq << "_" << MangledName;
11399   Fn->addFnAttr(Out.str());
11400 }
11401 
11402 // Helper function to generate the Advanced SIMD names depending on
11403 // the value of the NDS when simdlen is not present.
11404 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11405                                       StringRef Prefix, char ISA,
11406                                       StringRef ParSeq, StringRef MangledName,
11407                                       bool OutputBecomesInput,
11408                                       llvm::Function *Fn) {
11409   switch (NDS) {
11410   case 8:
11411     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11412                          OutputBecomesInput, Fn);
11413     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11414                          OutputBecomesInput, Fn);
11415     break;
11416   case 16:
11417     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11418                          OutputBecomesInput, Fn);
11419     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11420                          OutputBecomesInput, Fn);
11421     break;
11422   case 32:
11423     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11424                          OutputBecomesInput, Fn);
11425     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11426                          OutputBecomesInput, Fn);
11427     break;
11428   case 64:
11429   case 128:
11430     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11431                          OutputBecomesInput, Fn);
11432     break;
11433   default:
11434     llvm_unreachable("Scalar type is too wide.");
11435   }
11436 }
11437 
11438 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11439 static void emitAArch64DeclareSimdFunction(
11440     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11441     ArrayRef<ParamAttrTy> ParamAttrs,
11442     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11443     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11444 
11445   // Get basic data for building the vector signature.
11446   const auto Data = getNDSWDS(FD, ParamAttrs);
11447   const unsigned NDS = std::get<0>(Data);
11448   const unsigned WDS = std::get<1>(Data);
11449   const bool OutputBecomesInput = std::get<2>(Data);
11450 
11451   // Check the values provided via `simdlen` by the user.
11452   // 1. A `simdlen(1)` doesn't produce vector signatures,
11453   if (UserVLEN == 1) {
11454     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11455         DiagnosticsEngine::Warning,
11456         "The clause simdlen(1) has no effect when targeting aarch64.");
11457     CGM.getDiags().Report(SLoc, DiagID);
11458     return;
11459   }
11460 
11461   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11462   // Advanced SIMD output.
11463   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11464     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11465         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11466                                     "power of 2 when targeting Advanced SIMD.");
11467     CGM.getDiags().Report(SLoc, DiagID);
11468     return;
11469   }
11470 
11471   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11472   // limits.
11473   if (ISA == 's' && UserVLEN != 0) {
11474     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11475       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11476           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11477                                       "lanes in the architectural constraints "
11478                                       "for SVE (min is 128-bit, max is "
11479                                       "2048-bit, by steps of 128-bit)");
11480       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11481       return;
11482     }
11483   }
11484 
11485   // Sort out parameter sequence.
11486   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11487   StringRef Prefix = "_ZGV";
11488   // Generate simdlen from user input (if any).
11489   if (UserVLEN) {
11490     if (ISA == 's') {
11491       // SVE generates only a masked function.
11492       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11493                            OutputBecomesInput, Fn);
11494     } else {
11495       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11496       // Advanced SIMD generates one or two functions, depending on
11497       // the `[not]inbranch` clause.
11498       switch (State) {
11499       case OMPDeclareSimdDeclAttr::BS_Undefined:
11500         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11501                              OutputBecomesInput, Fn);
11502         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11503                              OutputBecomesInput, Fn);
11504         break;
11505       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11506         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11507                              OutputBecomesInput, Fn);
11508         break;
11509       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11510         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11511                              OutputBecomesInput, Fn);
11512         break;
11513       }
11514     }
11515   } else {
11516     // If no user simdlen is provided, follow the AAVFABI rules for
11517     // generating the vector length.
11518     if (ISA == 's') {
11519       // SVE, section 3.4.1, item 1.
11520       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11521                            OutputBecomesInput, Fn);
11522     } else {
11523       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11524       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11525       // two vector names depending on the use of the clause
11526       // `[not]inbranch`.
11527       switch (State) {
11528       case OMPDeclareSimdDeclAttr::BS_Undefined:
11529         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11530                                   OutputBecomesInput, Fn);
11531         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11532                                   OutputBecomesInput, Fn);
11533         break;
11534       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11535         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11536                                   OutputBecomesInput, Fn);
11537         break;
11538       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11539         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11540                                   OutputBecomesInput, Fn);
11541         break;
11542       }
11543     }
11544   }
11545 }
11546 
11547 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11548                                               llvm::Function *Fn) {
11549   ASTContext &C = CGM.getContext();
11550   FD = FD->getMostRecentDecl();
11551   // Map params to their positions in function decl.
11552   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11553   if (isa<CXXMethodDecl>(FD))
11554     ParamPositions.try_emplace(FD, 0);
11555   unsigned ParamPos = ParamPositions.size();
11556   for (const ParmVarDecl *P : FD->parameters()) {
11557     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11558     ++ParamPos;
11559   }
11560   while (FD) {
11561     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11562       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11563       // Mark uniform parameters.
11564       for (const Expr *E : Attr->uniforms()) {
11565         E = E->IgnoreParenImpCasts();
11566         unsigned Pos;
11567         if (isa<CXXThisExpr>(E)) {
11568           Pos = ParamPositions[FD];
11569         } else {
11570           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11571                                 ->getCanonicalDecl();
11572           Pos = ParamPositions[PVD];
11573         }
11574         ParamAttrs[Pos].Kind = Uniform;
11575       }
11576       // Get alignment info.
11577       auto NI = Attr->alignments_begin();
11578       for (const Expr *E : Attr->aligneds()) {
11579         E = E->IgnoreParenImpCasts();
11580         unsigned Pos;
11581         QualType ParmTy;
11582         if (isa<CXXThisExpr>(E)) {
11583           Pos = ParamPositions[FD];
11584           ParmTy = E->getType();
11585         } else {
11586           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11587                                 ->getCanonicalDecl();
11588           Pos = ParamPositions[PVD];
11589           ParmTy = PVD->getType();
11590         }
11591         ParamAttrs[Pos].Alignment =
11592             (*NI)
11593                 ? (*NI)->EvaluateKnownConstInt(C)
11594                 : llvm::APSInt::getUnsigned(
11595                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11596                           .getQuantity());
11597         ++NI;
11598       }
11599       // Mark linear parameters.
11600       auto SI = Attr->steps_begin();
11601       auto MI = Attr->modifiers_begin();
11602       for (const Expr *E : Attr->linears()) {
11603         E = E->IgnoreParenImpCasts();
11604         unsigned Pos;
11605         // Rescaling factor needed to compute the linear parameter
11606         // value in the mangled name.
11607         unsigned PtrRescalingFactor = 1;
11608         if (isa<CXXThisExpr>(E)) {
11609           Pos = ParamPositions[FD];
11610         } else {
11611           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11612                                 ->getCanonicalDecl();
11613           Pos = ParamPositions[PVD];
11614           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11615             PtrRescalingFactor = CGM.getContext()
11616                                      .getTypeSizeInChars(P->getPointeeType())
11617                                      .getQuantity();
11618         }
11619         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11620         ParamAttr.Kind = Linear;
11621         // Assuming a stride of 1, for `linear` without modifiers.
11622         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11623         if (*SI) {
11624           Expr::EvalResult Result;
11625           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11626             if (const auto *DRE =
11627                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11628               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11629                 ParamAttr.Kind = LinearWithVarStride;
11630                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11631                     ParamPositions[StridePVD->getCanonicalDecl()]);
11632               }
11633             }
11634           } else {
11635             ParamAttr.StrideOrArg = Result.Val.getInt();
11636           }
11637         }
11638         // If we are using a linear clause on a pointer, we need to
11639         // rescale the value of linear_step with the byte size of the
11640         // pointee type.
11641         if (Linear == ParamAttr.Kind)
11642           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11643         ++SI;
11644         ++MI;
11645       }
11646       llvm::APSInt VLENVal;
11647       SourceLocation ExprLoc;
11648       const Expr *VLENExpr = Attr->getSimdlen();
11649       if (VLENExpr) {
11650         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11651         ExprLoc = VLENExpr->getExprLoc();
11652       }
11653       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11654       if (CGM.getTriple().isX86()) {
11655         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11656       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11657         unsigned VLEN = VLENVal.getExtValue();
11658         StringRef MangledName = Fn->getName();
11659         if (CGM.getTarget().hasFeature("sve"))
11660           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11661                                          MangledName, 's', 128, Fn, ExprLoc);
11662         if (CGM.getTarget().hasFeature("neon"))
11663           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11664                                          MangledName, 'n', 128, Fn, ExprLoc);
11665       }
11666     }
11667     FD = FD->getPreviousDecl();
11668   }
11669 }
11670 
11671 namespace {
11672 /// Cleanup action for doacross support.
11673 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11674 public:
11675   static const int DoacrossFinArgs = 2;
11676 
11677 private:
11678   llvm::FunctionCallee RTLFn;
11679   llvm::Value *Args[DoacrossFinArgs];
11680 
11681 public:
11682   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11683                     ArrayRef<llvm::Value *> CallArgs)
11684       : RTLFn(RTLFn) {
11685     assert(CallArgs.size() == DoacrossFinArgs);
11686     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11687   }
11688   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11689     if (!CGF.HaveInsertPoint())
11690       return;
11691     CGF.EmitRuntimeCall(RTLFn, Args);
11692   }
11693 };
11694 } // namespace
11695 
11696 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11697                                        const OMPLoopDirective &D,
11698                                        ArrayRef<Expr *> NumIterations) {
11699   if (!CGF.HaveInsertPoint())
11700     return;
11701 
11702   ASTContext &C = CGM.getContext();
11703   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11704   RecordDecl *RD;
11705   if (KmpDimTy.isNull()) {
11706     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11707     //  kmp_int64 lo; // lower
11708     //  kmp_int64 up; // upper
11709     //  kmp_int64 st; // stride
11710     // };
11711     RD = C.buildImplicitRecord("kmp_dim");
11712     RD->startDefinition();
11713     addFieldToRecordDecl(C, RD, Int64Ty);
11714     addFieldToRecordDecl(C, RD, Int64Ty);
11715     addFieldToRecordDecl(C, RD, Int64Ty);
11716     RD->completeDefinition();
11717     KmpDimTy = C.getRecordType(RD);
11718   } else {
11719     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11720   }
11721   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11722   QualType ArrayTy =
11723       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11724 
11725   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11726   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11727   enum { LowerFD = 0, UpperFD, StrideFD };
11728   // Fill dims with data.
11729   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11730     LValue DimsLVal = CGF.MakeAddrLValue(
11731         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11732     // dims.upper = num_iterations;
11733     LValue UpperLVal = CGF.EmitLValueForField(
11734         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11735     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11736         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11737         Int64Ty, NumIterations[I]->getExprLoc());
11738     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11739     // dims.stride = 1;
11740     LValue StrideLVal = CGF.EmitLValueForField(
11741         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11742     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11743                           StrideLVal);
11744   }
11745 
11746   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11747   // kmp_int32 num_dims, struct kmp_dim * dims);
11748   llvm::Value *Args[] = {
11749       emitUpdateLocation(CGF, D.getBeginLoc()),
11750       getThreadID(CGF, D.getBeginLoc()),
11751       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11752       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11753           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11754           CGM.VoidPtrTy)};
11755 
11756   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11757       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11758   CGF.EmitRuntimeCall(RTLFn, Args);
11759   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11760       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11761   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11762       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11763   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11764                                              llvm::makeArrayRef(FiniArgs));
11765 }
11766 
11767 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11768                                           const OMPDependClause *C) {
11769   QualType Int64Ty =
11770       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11771   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11772   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11773       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11774   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11775   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11776     const Expr *CounterVal = C->getLoopData(I);
11777     assert(CounterVal);
11778     llvm::Value *CntVal = CGF.EmitScalarConversion(
11779         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11780         CounterVal->getExprLoc());
11781     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11782                           /*Volatile=*/false, Int64Ty);
11783   }
11784   llvm::Value *Args[] = {
11785       emitUpdateLocation(CGF, C->getBeginLoc()),
11786       getThreadID(CGF, C->getBeginLoc()),
11787       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11788   llvm::FunctionCallee RTLFn;
11789   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11790     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11791                                                   OMPRTL___kmpc_doacross_post);
11792   } else {
11793     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11794     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11795                                                   OMPRTL___kmpc_doacross_wait);
11796   }
11797   CGF.EmitRuntimeCall(RTLFn, Args);
11798 }
11799 
11800 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11801                                llvm::FunctionCallee Callee,
11802                                ArrayRef<llvm::Value *> Args) const {
11803   assert(Loc.isValid() && "Outlined function call location must be valid.");
11804   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11805 
11806   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11807     if (Fn->doesNotThrow()) {
11808       CGF.EmitNounwindRuntimeCall(Fn, Args);
11809       return;
11810     }
11811   }
11812   CGF.EmitRuntimeCall(Callee, Args);
11813 }
11814 
11815 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11816     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11817     ArrayRef<llvm::Value *> Args) const {
11818   emitCall(CGF, Loc, OutlinedFn, Args);
11819 }
11820 
11821 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11822   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11823     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11824       HasEmittedDeclareTargetRegion = true;
11825 }
11826 
11827 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11828                                              const VarDecl *NativeParam,
11829                                              const VarDecl *TargetParam) const {
11830   return CGF.GetAddrOfLocalVar(NativeParam);
11831 }
11832 
11833 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11834                                                    const VarDecl *VD) {
11835   if (!VD)
11836     return Address::invalid();
11837   Address UntiedAddr = Address::invalid();
11838   Address UntiedRealAddr = Address::invalid();
11839   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11840   if (It != FunctionToUntiedTaskStackMap.end()) {
11841     const UntiedLocalVarsAddressesMap &UntiedData =
11842         UntiedLocalVarsStack[It->second];
11843     auto I = UntiedData.find(VD);
11844     if (I != UntiedData.end()) {
11845       UntiedAddr = I->second.first;
11846       UntiedRealAddr = I->second.second;
11847     }
11848   }
11849   const VarDecl *CVD = VD->getCanonicalDecl();
11850   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11851     // Use the default allocation.
11852     if (!isAllocatableDecl(VD))
11853       return UntiedAddr;
11854     llvm::Value *Size;
11855     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11856     if (CVD->getType()->isVariablyModifiedType()) {
11857       Size = CGF.getTypeSize(CVD->getType());
11858       // Align the size: ((size + align - 1) / align) * align
11859       Size = CGF.Builder.CreateNUWAdd(
11860           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11861       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11862       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11863     } else {
11864       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11865       Size = CGM.getSize(Sz.alignTo(Align));
11866     }
11867     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11868     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11869     assert(AA->getAllocator() &&
11870            "Expected allocator expression for non-default allocator.");
11871     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11872     // According to the standard, the original allocator type is a enum
11873     // (integer). Convert to pointer type, if required.
11874     Allocator = CGF.EmitScalarConversion(
11875         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11876         AA->getAllocator()->getExprLoc());
11877     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11878 
11879     llvm::Value *Addr =
11880         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11881                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11882                             Args, getName({CVD->getName(), ".void.addr"}));
11883     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11884         CGM.getModule(), OMPRTL___kmpc_free);
11885     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11886     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11887         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11888     if (UntiedAddr.isValid())
11889       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11890 
11891     // Cleanup action for allocate support.
11892     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11893       llvm::FunctionCallee RTLFn;
11894       unsigned LocEncoding;
11895       Address Addr;
11896       const Expr *Allocator;
11897 
11898     public:
11899       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11900                            Address Addr, const Expr *Allocator)
11901           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11902             Allocator(Allocator) {}
11903       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11904         if (!CGF.HaveInsertPoint())
11905           return;
11906         llvm::Value *Args[3];
11907         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11908             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11909         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11910             Addr.getPointer(), CGF.VoidPtrTy);
11911         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11912         // According to the standard, the original allocator type is a enum
11913         // (integer). Convert to pointer type, if required.
11914         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11915                                             CGF.getContext().VoidPtrTy,
11916                                             Allocator->getExprLoc());
11917         Args[2] = AllocVal;
11918 
11919         CGF.EmitRuntimeCall(RTLFn, Args);
11920       }
11921     };
11922     Address VDAddr =
11923         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11924     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11925         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11926         VDAddr, AA->getAllocator());
11927     if (UntiedRealAddr.isValid())
11928       if (auto *Region =
11929               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11930         Region->emitUntiedSwitch(CGF);
11931     return VDAddr;
11932   }
11933   return UntiedAddr;
11934 }
11935 
11936 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11937                                              const VarDecl *VD) const {
11938   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11939   if (It == FunctionToUntiedTaskStackMap.end())
11940     return false;
11941   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11942 }
11943 
11944 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11945     CodeGenModule &CGM, const OMPLoopDirective &S)
11946     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11947   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11948   if (!NeedToPush)
11949     return;
11950   NontemporalDeclsSet &DS =
11951       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11952   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11953     for (const Stmt *Ref : C->private_refs()) {
11954       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11955       const ValueDecl *VD;
11956       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11957         VD = DRE->getDecl();
11958       } else {
11959         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11960         assert((ME->isImplicitCXXThis() ||
11961                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11962                "Expected member of current class.");
11963         VD = ME->getMemberDecl();
11964       }
11965       DS.insert(VD);
11966     }
11967   }
11968 }
11969 
11970 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11971   if (!NeedToPush)
11972     return;
11973   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11974 }
11975 
11976 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11977     CodeGenFunction &CGF,
11978     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11979                          std::pair<Address, Address>> &LocalVars)
11980     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11981   if (!NeedToPush)
11982     return;
11983   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11984       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11985   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11986 }
11987 
11988 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11989   if (!NeedToPush)
11990     return;
11991   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11992 }
11993 
11994 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11995   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11996 
11997   return llvm::any_of(
11998       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11999       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12000 }
12001 
12002 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12003     const OMPExecutableDirective &S,
12004     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12005     const {
12006   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12007   // Vars in target/task regions must be excluded completely.
12008   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12009       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12010     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12011     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12012     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12013     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12014       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12015         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12016     }
12017   }
12018   // Exclude vars in private clauses.
12019   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12020     for (const Expr *Ref : C->varlists()) {
12021       if (!Ref->getType()->isScalarType())
12022         continue;
12023       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12024       if (!DRE)
12025         continue;
12026       NeedToCheckForLPCs.insert(DRE->getDecl());
12027     }
12028   }
12029   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12030     for (const Expr *Ref : C->varlists()) {
12031       if (!Ref->getType()->isScalarType())
12032         continue;
12033       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12034       if (!DRE)
12035         continue;
12036       NeedToCheckForLPCs.insert(DRE->getDecl());
12037     }
12038   }
12039   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12040     for (const Expr *Ref : C->varlists()) {
12041       if (!Ref->getType()->isScalarType())
12042         continue;
12043       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12044       if (!DRE)
12045         continue;
12046       NeedToCheckForLPCs.insert(DRE->getDecl());
12047     }
12048   }
12049   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12050     for (const Expr *Ref : C->varlists()) {
12051       if (!Ref->getType()->isScalarType())
12052         continue;
12053       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12054       if (!DRE)
12055         continue;
12056       NeedToCheckForLPCs.insert(DRE->getDecl());
12057     }
12058   }
12059   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12060     for (const Expr *Ref : C->varlists()) {
12061       if (!Ref->getType()->isScalarType())
12062         continue;
12063       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12064       if (!DRE)
12065         continue;
12066       NeedToCheckForLPCs.insert(DRE->getDecl());
12067     }
12068   }
12069   for (const Decl *VD : NeedToCheckForLPCs) {
12070     for (const LastprivateConditionalData &Data :
12071          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12072       if (Data.DeclToUniqueName.count(VD) > 0) {
12073         if (!Data.Disabled)
12074           NeedToAddForLPCsAsDisabled.insert(VD);
12075         break;
12076       }
12077     }
12078   }
12079 }
12080 
12081 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12082     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12083     : CGM(CGF.CGM),
12084       Action((CGM.getLangOpts().OpenMP >= 50 &&
12085               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12086                            [](const OMPLastprivateClause *C) {
12087                              return C->getKind() ==
12088                                     OMPC_LASTPRIVATE_conditional;
12089                            }))
12090                  ? ActionToDo::PushAsLastprivateConditional
12091                  : ActionToDo::DoNotPush) {
12092   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12093   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12094     return;
12095   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12096          "Expected a push action.");
12097   LastprivateConditionalData &Data =
12098       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12099   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12100     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12101       continue;
12102 
12103     for (const Expr *Ref : C->varlists()) {
12104       Data.DeclToUniqueName.insert(std::make_pair(
12105           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12106           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12107     }
12108   }
12109   Data.IVLVal = IVLVal;
12110   Data.Fn = CGF.CurFn;
12111 }
12112 
12113 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12114     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12115     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12116   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12117   if (CGM.getLangOpts().OpenMP < 50)
12118     return;
12119   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12120   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12121   if (!NeedToAddForLPCsAsDisabled.empty()) {
12122     Action = ActionToDo::DisableLastprivateConditional;
12123     LastprivateConditionalData &Data =
12124         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12125     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12126       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12127     Data.Fn = CGF.CurFn;
12128     Data.Disabled = true;
12129   }
12130 }
12131 
12132 CGOpenMPRuntime::LastprivateConditionalRAII
12133 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12134     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12135   return LastprivateConditionalRAII(CGF, S);
12136 }
12137 
12138 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12139   if (CGM.getLangOpts().OpenMP < 50)
12140     return;
12141   if (Action == ActionToDo::DisableLastprivateConditional) {
12142     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12143            "Expected list of disabled private vars.");
12144     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12145   }
12146   if (Action == ActionToDo::PushAsLastprivateConditional) {
12147     assert(
12148         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12149         "Expected list of lastprivate conditional vars.");
12150     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12151   }
12152 }
12153 
12154 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12155                                                         const VarDecl *VD) {
12156   ASTContext &C = CGM.getContext();
12157   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12158   if (I == LastprivateConditionalToTypes.end())
12159     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12160   QualType NewType;
12161   const FieldDecl *VDField;
12162   const FieldDecl *FiredField;
12163   LValue BaseLVal;
12164   auto VI = I->getSecond().find(VD);
12165   if (VI == I->getSecond().end()) {
12166     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12167     RD->startDefinition();
12168     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12169     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12170     RD->completeDefinition();
12171     NewType = C.getRecordType(RD);
12172     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12173     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12174     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12175   } else {
12176     NewType = std::get<0>(VI->getSecond());
12177     VDField = std::get<1>(VI->getSecond());
12178     FiredField = std::get<2>(VI->getSecond());
12179     BaseLVal = std::get<3>(VI->getSecond());
12180   }
12181   LValue FiredLVal =
12182       CGF.EmitLValueForField(BaseLVal, FiredField);
12183   CGF.EmitStoreOfScalar(
12184       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12185       FiredLVal);
12186   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12187 }
12188 
12189 namespace {
12190 /// Checks if the lastprivate conditional variable is referenced in LHS.
12191 class LastprivateConditionalRefChecker final
12192     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12193   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12194   const Expr *FoundE = nullptr;
12195   const Decl *FoundD = nullptr;
12196   StringRef UniqueDeclName;
12197   LValue IVLVal;
12198   llvm::Function *FoundFn = nullptr;
12199   SourceLocation Loc;
12200 
12201 public:
12202   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12203     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12204          llvm::reverse(LPM)) {
12205       auto It = D.DeclToUniqueName.find(E->getDecl());
12206       if (It == D.DeclToUniqueName.end())
12207         continue;
12208       if (D.Disabled)
12209         return false;
12210       FoundE = E;
12211       FoundD = E->getDecl()->getCanonicalDecl();
12212       UniqueDeclName = It->second;
12213       IVLVal = D.IVLVal;
12214       FoundFn = D.Fn;
12215       break;
12216     }
12217     return FoundE == E;
12218   }
12219   bool VisitMemberExpr(const MemberExpr *E) {
12220     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12221       return false;
12222     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12223          llvm::reverse(LPM)) {
12224       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12225       if (It == D.DeclToUniqueName.end())
12226         continue;
12227       if (D.Disabled)
12228         return false;
12229       FoundE = E;
12230       FoundD = E->getMemberDecl()->getCanonicalDecl();
12231       UniqueDeclName = It->second;
12232       IVLVal = D.IVLVal;
12233       FoundFn = D.Fn;
12234       break;
12235     }
12236     return FoundE == E;
12237   }
12238   bool VisitStmt(const Stmt *S) {
12239     for (const Stmt *Child : S->children()) {
12240       if (!Child)
12241         continue;
12242       if (const auto *E = dyn_cast<Expr>(Child))
12243         if (!E->isGLValue())
12244           continue;
12245       if (Visit(Child))
12246         return true;
12247     }
12248     return false;
12249   }
12250   explicit LastprivateConditionalRefChecker(
12251       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12252       : LPM(LPM) {}
12253   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12254   getFoundData() const {
12255     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12256   }
12257 };
12258 } // namespace
12259 
12260 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12261                                                        LValue IVLVal,
12262                                                        StringRef UniqueDeclName,
12263                                                        LValue LVal,
12264                                                        SourceLocation Loc) {
12265   // Last updated loop counter for the lastprivate conditional var.
12266   // int<xx> last_iv = 0;
12267   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12268   llvm::Constant *LastIV =
12269       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12270   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12271       IVLVal.getAlignment().getAsAlign());
12272   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12273 
12274   // Last value of the lastprivate conditional.
12275   // decltype(priv_a) last_a;
12276   llvm::Constant *Last = getOrCreateInternalVariable(
12277       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12278   cast<llvm::GlobalVariable>(Last)->setAlignment(
12279       LVal.getAlignment().getAsAlign());
12280   LValue LastLVal =
12281       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12282 
12283   // Global loop counter. Required to handle inner parallel-for regions.
12284   // iv
12285   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12286 
12287   // #pragma omp critical(a)
12288   // if (last_iv <= iv) {
12289   //   last_iv = iv;
12290   //   last_a = priv_a;
12291   // }
12292   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12293                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12294     Action.Enter(CGF);
12295     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12296     // (last_iv <= iv) ? Check if the variable is updated and store new
12297     // value in global var.
12298     llvm::Value *CmpRes;
12299     if (IVLVal.getType()->isSignedIntegerType()) {
12300       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12301     } else {
12302       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12303              "Loop iteration variable must be integer.");
12304       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12305     }
12306     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12307     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12308     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12309     // {
12310     CGF.EmitBlock(ThenBB);
12311 
12312     //   last_iv = iv;
12313     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12314 
12315     //   last_a = priv_a;
12316     switch (CGF.getEvaluationKind(LVal.getType())) {
12317     case TEK_Scalar: {
12318       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12319       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12320       break;
12321     }
12322     case TEK_Complex: {
12323       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12324       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12325       break;
12326     }
12327     case TEK_Aggregate:
12328       llvm_unreachable(
12329           "Aggregates are not supported in lastprivate conditional.");
12330     }
12331     // }
12332     CGF.EmitBranch(ExitBB);
12333     // There is no need to emit line number for unconditional branch.
12334     (void)ApplyDebugLocation::CreateEmpty(CGF);
12335     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12336   };
12337 
12338   if (CGM.getLangOpts().OpenMPSimd) {
12339     // Do not emit as a critical region as no parallel region could be emitted.
12340     RegionCodeGenTy ThenRCG(CodeGen);
12341     ThenRCG(CGF);
12342   } else {
12343     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12344   }
12345 }
12346 
12347 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12348                                                          const Expr *LHS) {
12349   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12350     return;
12351   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12352   if (!Checker.Visit(LHS))
12353     return;
12354   const Expr *FoundE;
12355   const Decl *FoundD;
12356   StringRef UniqueDeclName;
12357   LValue IVLVal;
12358   llvm::Function *FoundFn;
12359   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12360       Checker.getFoundData();
12361   if (FoundFn != CGF.CurFn) {
12362     // Special codegen for inner parallel regions.
12363     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12364     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12365     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12366            "Lastprivate conditional is not found in outer region.");
12367     QualType StructTy = std::get<0>(It->getSecond());
12368     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12369     LValue PrivLVal = CGF.EmitLValue(FoundE);
12370     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12371         PrivLVal.getAddress(CGF),
12372         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12373     LValue BaseLVal =
12374         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12375     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12376     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12377                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12378                         FiredLVal, llvm::AtomicOrdering::Unordered,
12379                         /*IsVolatile=*/true, /*isInit=*/false);
12380     return;
12381   }
12382 
12383   // Private address of the lastprivate conditional in the current context.
12384   // priv_a
12385   LValue LVal = CGF.EmitLValue(FoundE);
12386   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12387                                    FoundE->getExprLoc());
12388 }
12389 
12390 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12391     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12392     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12393   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12394     return;
12395   auto Range = llvm::reverse(LastprivateConditionalStack);
12396   auto It = llvm::find_if(
12397       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12398   if (It == Range.end() || It->Fn != CGF.CurFn)
12399     return;
12400   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12401   assert(LPCI != LastprivateConditionalToTypes.end() &&
12402          "Lastprivates must be registered already.");
12403   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12404   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12405   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12406   for (const auto &Pair : It->DeclToUniqueName) {
12407     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12408     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12409       continue;
12410     auto I = LPCI->getSecond().find(Pair.first);
12411     assert(I != LPCI->getSecond().end() &&
12412            "Lastprivate must be rehistered already.");
12413     // bool Cmp = priv_a.Fired != 0;
12414     LValue BaseLVal = std::get<3>(I->getSecond());
12415     LValue FiredLVal =
12416         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12417     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12418     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12419     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12420     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12421     // if (Cmp) {
12422     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12423     CGF.EmitBlock(ThenBB);
12424     Address Addr = CGF.GetAddrOfLocalVar(VD);
12425     LValue LVal;
12426     if (VD->getType()->isReferenceType())
12427       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12428                                            AlignmentSource::Decl);
12429     else
12430       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12431                                 AlignmentSource::Decl);
12432     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12433                                      D.getBeginLoc());
12434     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12435     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12436     // }
12437   }
12438 }
12439 
12440 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12441     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12442     SourceLocation Loc) {
12443   if (CGF.getLangOpts().OpenMP < 50)
12444     return;
12445   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12446   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12447          "Unknown lastprivate conditional variable.");
12448   StringRef UniqueName = It->second;
12449   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12450   // The variable was not updated in the region - exit.
12451   if (!GV)
12452     return;
12453   LValue LPLVal = CGF.MakeAddrLValue(
12454       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12455   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12456   CGF.EmitStoreOfScalar(Res, PrivLVal);
12457 }
12458 
12459 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12460     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12461     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12462   llvm_unreachable("Not supported in SIMD-only mode");
12463 }
12464 
12465 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12466     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12467     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12468   llvm_unreachable("Not supported in SIMD-only mode");
12469 }
12470 
12471 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12472     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12473     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12474     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12475     bool Tied, unsigned &NumberOfParts) {
12476   llvm_unreachable("Not supported in SIMD-only mode");
12477 }
12478 
12479 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12480                                            SourceLocation Loc,
12481                                            llvm::Function *OutlinedFn,
12482                                            ArrayRef<llvm::Value *> CapturedVars,
12483                                            const Expr *IfCond) {
12484   llvm_unreachable("Not supported in SIMD-only mode");
12485 }
12486 
12487 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12488     CodeGenFunction &CGF, StringRef CriticalName,
12489     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12490     const Expr *Hint) {
12491   llvm_unreachable("Not supported in SIMD-only mode");
12492 }
12493 
12494 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12495                                            const RegionCodeGenTy &MasterOpGen,
12496                                            SourceLocation Loc) {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12501                                             SourceLocation Loc) {
12502   llvm_unreachable("Not supported in SIMD-only mode");
12503 }
12504 
12505 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12506     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12507     SourceLocation Loc) {
12508   llvm_unreachable("Not supported in SIMD-only mode");
12509 }
12510 
12511 void CGOpenMPSIMDRuntime::emitSingleRegion(
12512     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12513     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12514     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12515     ArrayRef<const Expr *> AssignmentOps) {
12516   llvm_unreachable("Not supported in SIMD-only mode");
12517 }
12518 
12519 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12520                                             const RegionCodeGenTy &OrderedOpGen,
12521                                             SourceLocation Loc,
12522                                             bool IsThreads) {
12523   llvm_unreachable("Not supported in SIMD-only mode");
12524 }
12525 
12526 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12527                                           SourceLocation Loc,
12528                                           OpenMPDirectiveKind Kind,
12529                                           bool EmitChecks,
12530                                           bool ForceSimpleCall) {
12531   llvm_unreachable("Not supported in SIMD-only mode");
12532 }
12533 
12534 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12535     CodeGenFunction &CGF, SourceLocation Loc,
12536     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12537     bool Ordered, const DispatchRTInput &DispatchValues) {
12538   llvm_unreachable("Not supported in SIMD-only mode");
12539 }
12540 
12541 void CGOpenMPSIMDRuntime::emitForStaticInit(
12542     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12543     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12544   llvm_unreachable("Not supported in SIMD-only mode");
12545 }
12546 
12547 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12548     CodeGenFunction &CGF, SourceLocation Loc,
12549     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12550   llvm_unreachable("Not supported in SIMD-only mode");
12551 }
12552 
12553 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12554                                                      SourceLocation Loc,
12555                                                      unsigned IVSize,
12556                                                      bool IVSigned) {
12557   llvm_unreachable("Not supported in SIMD-only mode");
12558 }
12559 
12560 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12561                                               SourceLocation Loc,
12562                                               OpenMPDirectiveKind DKind) {
12563   llvm_unreachable("Not supported in SIMD-only mode");
12564 }
12565 
12566 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12567                                               SourceLocation Loc,
12568                                               unsigned IVSize, bool IVSigned,
12569                                               Address IL, Address LB,
12570                                               Address UB, Address ST) {
12571   llvm_unreachable("Not supported in SIMD-only mode");
12572 }
12573 
12574 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12575                                                llvm::Value *NumThreads,
12576                                                SourceLocation Loc) {
12577   llvm_unreachable("Not supported in SIMD-only mode");
12578 }
12579 
12580 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12581                                              ProcBindKind ProcBind,
12582                                              SourceLocation Loc) {
12583   llvm_unreachable("Not supported in SIMD-only mode");
12584 }
12585 
12586 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12587                                                     const VarDecl *VD,
12588                                                     Address VDAddr,
12589                                                     SourceLocation Loc) {
12590   llvm_unreachable("Not supported in SIMD-only mode");
12591 }
12592 
12593 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12594     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12595     CodeGenFunction *CGF) {
12596   llvm_unreachable("Not supported in SIMD-only mode");
12597 }
12598 
12599 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12600     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12601   llvm_unreachable("Not supported in SIMD-only mode");
12602 }
12603 
12604 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12605                                     ArrayRef<const Expr *> Vars,
12606                                     SourceLocation Loc,
12607                                     llvm::AtomicOrdering AO) {
12608   llvm_unreachable("Not supported in SIMD-only mode");
12609 }
12610 
12611 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12612                                        const OMPExecutableDirective &D,
12613                                        llvm::Function *TaskFunction,
12614                                        QualType SharedsTy, Address Shareds,
12615                                        const Expr *IfCond,
12616                                        const OMPTaskDataTy &Data) {
12617   llvm_unreachable("Not supported in SIMD-only mode");
12618 }
12619 
12620 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12621     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12622     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12623     const Expr *IfCond, const OMPTaskDataTy &Data) {
12624   llvm_unreachable("Not supported in SIMD-only mode");
12625 }
12626 
12627 void CGOpenMPSIMDRuntime::emitReduction(
12628     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12629     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12630     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12631   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12632   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12633                                  ReductionOps, Options);
12634 }
12635 
12636 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12637     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12638     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12639   llvm_unreachable("Not supported in SIMD-only mode");
12640 }
12641 
12642 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12643                                                 SourceLocation Loc,
12644                                                 bool IsWorksharingReduction) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12649                                                   SourceLocation Loc,
12650                                                   ReductionCodeGen &RCG,
12651                                                   unsigned N) {
12652   llvm_unreachable("Not supported in SIMD-only mode");
12653 }
12654 
12655 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12656                                                   SourceLocation Loc,
12657                                                   llvm::Value *ReductionsPtr,
12658                                                   LValue SharedLVal) {
12659   llvm_unreachable("Not supported in SIMD-only mode");
12660 }
12661 
12662 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12663                                            SourceLocation Loc) {
12664   llvm_unreachable("Not supported in SIMD-only mode");
12665 }
12666 
12667 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12668     CodeGenFunction &CGF, SourceLocation Loc,
12669     OpenMPDirectiveKind CancelRegion) {
12670   llvm_unreachable("Not supported in SIMD-only mode");
12671 }
12672 
12673 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12674                                          SourceLocation Loc, const Expr *IfCond,
12675                                          OpenMPDirectiveKind CancelRegion) {
12676   llvm_unreachable("Not supported in SIMD-only mode");
12677 }
12678 
12679 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12680     const OMPExecutableDirective &D, StringRef ParentName,
12681     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12682     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12683   llvm_unreachable("Not supported in SIMD-only mode");
12684 }
12685 
12686 void CGOpenMPSIMDRuntime::emitTargetCall(
12687     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12688     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12689     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12690     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12691                                      const OMPLoopDirective &D)>
12692         SizeEmitter) {
12693   llvm_unreachable("Not supported in SIMD-only mode");
12694 }
12695 
12696 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12697   llvm_unreachable("Not supported in SIMD-only mode");
12698 }
12699 
12700 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12701   llvm_unreachable("Not supported in SIMD-only mode");
12702 }
12703 
12704 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12705   return false;
12706 }
12707 
12708 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12709                                         const OMPExecutableDirective &D,
12710                                         SourceLocation Loc,
12711                                         llvm::Function *OutlinedFn,
12712                                         ArrayRef<llvm::Value *> CapturedVars) {
12713   llvm_unreachable("Not supported in SIMD-only mode");
12714 }
12715 
12716 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12717                                              const Expr *NumTeams,
12718                                              const Expr *ThreadLimit,
12719                                              SourceLocation Loc) {
12720   llvm_unreachable("Not supported in SIMD-only mode");
12721 }
12722 
12723 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12724     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12725     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12726   llvm_unreachable("Not supported in SIMD-only mode");
12727 }
12728 
12729 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12730     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12731     const Expr *Device) {
12732   llvm_unreachable("Not supported in SIMD-only mode");
12733 }
12734 
12735 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12736                                            const OMPLoopDirective &D,
12737                                            ArrayRef<Expr *> NumIterations) {
12738   llvm_unreachable("Not supported in SIMD-only mode");
12739 }
12740 
12741 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12742                                               const OMPDependClause *C) {
12743   llvm_unreachable("Not supported in SIMD-only mode");
12744 }
12745 
12746 const VarDecl *
12747 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12748                                         const VarDecl *NativeParam) const {
12749   llvm_unreachable("Not supported in SIMD-only mode");
12750 }
12751 
12752 Address
12753 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12754                                          const VarDecl *NativeParam,
12755                                          const VarDecl *TargetParam) const {
12756   llvm_unreachable("Not supported in SIMD-only mode");
12757 }
12758