1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1652     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1653     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1654     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1655       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1656           << PLoc.getFilename() << EC.message();
1657   }
1658 
1659   DeviceID = ID.getDevice();
1660   FileID = ID.getFile();
1661   LineNum = PLoc.getLine();
1662 }
1663 
1664 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1665   if (CGM.getLangOpts().OpenMPSimd)
1666     return Address::invalid();
1667   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1668       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1669   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1670               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1671                HasRequiresUnifiedSharedMemory))) {
1672     SmallString<64> PtrName;
1673     {
1674       llvm::raw_svector_ostream OS(PtrName);
1675       OS << CGM.getMangledName(GlobalDecl(VD));
1676       if (!VD->isExternallyVisible()) {
1677         unsigned DeviceID, FileID, Line;
1678         getTargetEntryUniqueInfo(CGM.getContext(),
1679                                  VD->getCanonicalDecl()->getBeginLoc(),
1680                                  DeviceID, FileID, Line);
1681         OS << llvm::format("_%x", FileID);
1682       }
1683       OS << "_decl_tgt_ref_ptr";
1684     }
1685     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1686     if (!Ptr) {
1687       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1688       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1689                                         PtrName);
1690 
1691       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1692       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1693 
1694       if (!CGM.getLangOpts().OpenMPIsDevice)
1695         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1696       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1697     }
1698     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1699   }
1700   return Address::invalid();
1701 }
1702 
1703 llvm::Constant *
1704 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1705   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1706          !CGM.getContext().getTargetInfo().isTLSSupported());
1707   // Lookup the entry, lazily creating it if necessary.
1708   std::string Suffix = getName({"cache", ""});
1709   return getOrCreateInternalVariable(
1710       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1711 }
1712 
1713 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1714                                                 const VarDecl *VD,
1715                                                 Address VDAddr,
1716                                                 SourceLocation Loc) {
1717   if (CGM.getLangOpts().OpenMPUseTLS &&
1718       CGM.getContext().getTargetInfo().isTLSSupported())
1719     return VDAddr;
1720 
1721   llvm::Type *VarTy = VDAddr.getElementType();
1722   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1723                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1724                                                        CGM.Int8PtrTy),
1725                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1726                          getOrCreateThreadPrivateCache(VD)};
1727   return Address(CGF.EmitRuntimeCall(
1728                      OMPBuilder.getOrCreateRuntimeFunction(
1729                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1730                      Args),
1731                  VDAddr.getAlignment());
1732 }
1733 
1734 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1735     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1736     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1737   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1738   // library.
1739   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1740   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1741                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1742                       OMPLoc);
1743   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1744   // to register constructor/destructor for variable.
1745   llvm::Value *Args[] = {
1746       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1747       Ctor, CopyCtor, Dtor};
1748   CGF.EmitRuntimeCall(
1749       OMPBuilder.getOrCreateRuntimeFunction(
1750           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1751       Args);
1752 }
1753 
1754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1755     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1756     bool PerformInit, CodeGenFunction *CGF) {
1757   if (CGM.getLangOpts().OpenMPUseTLS &&
1758       CGM.getContext().getTargetInfo().isTLSSupported())
1759     return nullptr;
1760 
1761   VD = VD->getDefinition(CGM.getContext());
1762   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1763     QualType ASTTy = VD->getType();
1764 
1765     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1766     const Expr *Init = VD->getAnyInitializer();
1767     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1768       // Generate function that re-emits the declaration's initializer into the
1769       // threadprivate copy of the variable VD
1770       CodeGenFunction CtorCGF(CGM);
1771       FunctionArgList Args;
1772       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1773                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1774                             ImplicitParamDecl::Other);
1775       Args.push_back(&Dst);
1776 
1777       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1778           CGM.getContext().VoidPtrTy, Args);
1779       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1780       std::string Name = getName({"__kmpc_global_ctor_", ""});
1781       llvm::Function *Fn =
1782           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1783       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1784                             Args, Loc, Loc);
1785       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1786           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1787           CGM.getContext().VoidPtrTy, Dst.getLocation());
1788       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1789       Arg = CtorCGF.Builder.CreateElementBitCast(
1790           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1791       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1792                                /*IsInitializer=*/true);
1793       ArgVal = CtorCGF.EmitLoadOfScalar(
1794           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1795           CGM.getContext().VoidPtrTy, Dst.getLocation());
1796       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1797       CtorCGF.FinishFunction();
1798       Ctor = Fn;
1799     }
1800     if (VD->getType().isDestructedType() != QualType::DK_none) {
1801       // Generate function that emits destructor call for the threadprivate copy
1802       // of the variable VD
1803       CodeGenFunction DtorCGF(CGM);
1804       FunctionArgList Args;
1805       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1806                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1807                             ImplicitParamDecl::Other);
1808       Args.push_back(&Dst);
1809 
1810       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1811           CGM.getContext().VoidTy, Args);
1812       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1813       std::string Name = getName({"__kmpc_global_dtor_", ""});
1814       llvm::Function *Fn =
1815           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1816       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1817       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1818                             Loc, Loc);
1819       // Create a scope with an artificial location for the body of this function.
1820       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1821       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1822           DtorCGF.GetAddrOfLocalVar(&Dst),
1823           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1824       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1825                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1826                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1827       DtorCGF.FinishFunction();
1828       Dtor = Fn;
1829     }
1830     // Do not emit init function if it is not required.
1831     if (!Ctor && !Dtor)
1832       return nullptr;
1833 
1834     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1835     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1836                                                /*isVarArg=*/false)
1837                            ->getPointerTo();
1838     // Copying constructor for the threadprivate variable.
1839     // Must be NULL - reserved by runtime, but currently it requires that this
1840     // parameter is always NULL. Otherwise it fires assertion.
1841     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1842     if (Ctor == nullptr) {
1843       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1844                                              /*isVarArg=*/false)
1845                          ->getPointerTo();
1846       Ctor = llvm::Constant::getNullValue(CtorTy);
1847     }
1848     if (Dtor == nullptr) {
1849       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1850                                              /*isVarArg=*/false)
1851                          ->getPointerTo();
1852       Dtor = llvm::Constant::getNullValue(DtorTy);
1853     }
1854     if (!CGF) {
1855       auto *InitFunctionTy =
1856           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1857       std::string Name = getName({"__omp_threadprivate_init_", ""});
1858       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1859           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1860       CodeGenFunction InitCGF(CGM);
1861       FunctionArgList ArgList;
1862       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1863                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1864                             Loc, Loc);
1865       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866       InitCGF.FinishFunction();
1867       return InitFunction;
1868     }
1869     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1870   }
1871   return nullptr;
1872 }
1873 
1874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1875                                                      llvm::GlobalVariable *Addr,
1876                                                      bool PerformInit) {
1877   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1878       !CGM.getLangOpts().OpenMPIsDevice)
1879     return false;
1880   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1881       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1882   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1883       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1884        HasRequiresUnifiedSharedMemory))
1885     return CGM.getLangOpts().OpenMPIsDevice;
1886   VD = VD->getDefinition(CGM.getContext());
1887   assert(VD && "Unknown VarDecl");
1888 
1889   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1890     return CGM.getLangOpts().OpenMPIsDevice;
1891 
1892   QualType ASTTy = VD->getType();
1893   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1894 
1895   // Produce the unique prefix to identify the new target regions. We use
1896   // the source location of the variable declaration which we know to not
1897   // conflict with any target region.
1898   unsigned DeviceID;
1899   unsigned FileID;
1900   unsigned Line;
1901   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1902   SmallString<128> Buffer, Out;
1903   {
1904     llvm::raw_svector_ostream OS(Buffer);
1905     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1906        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1907   }
1908 
1909   const Expr *Init = VD->getAnyInitializer();
1910   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1911     llvm::Constant *Ctor;
1912     llvm::Constant *ID;
1913     if (CGM.getLangOpts().OpenMPIsDevice) {
1914       // Generate function that re-emits the declaration's initializer into
1915       // the threadprivate copy of the variable VD
1916       CodeGenFunction CtorCGF(CGM);
1917 
1918       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1919       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1920       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1921           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1922       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1923       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1924                             FunctionArgList(), Loc, Loc);
1925       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1926       CtorCGF.EmitAnyExprToMem(Init,
1927                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1928                                Init->getType().getQualifiers(),
1929                                /*IsInitializer=*/true);
1930       CtorCGF.FinishFunction();
1931       Ctor = Fn;
1932       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1933       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1934     } else {
1935       Ctor = new llvm::GlobalVariable(
1936           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1937           llvm::GlobalValue::PrivateLinkage,
1938           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1939       ID = Ctor;
1940     }
1941 
1942     // Register the information for the entry associated with the constructor.
1943     Out.clear();
1944     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1945         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1946         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1947   }
1948   if (VD->getType().isDestructedType() != QualType::DK_none) {
1949     llvm::Constant *Dtor;
1950     llvm::Constant *ID;
1951     if (CGM.getLangOpts().OpenMPIsDevice) {
1952       // Generate function that emits destructor call for the threadprivate
1953       // copy of the variable VD
1954       CodeGenFunction DtorCGF(CGM);
1955 
1956       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1957       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1958       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1959           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1960       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1961       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1962                             FunctionArgList(), Loc, Loc);
1963       // Create a scope with an artificial location for the body of this
1964       // function.
1965       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1966       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1967                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1968                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1969       DtorCGF.FinishFunction();
1970       Dtor = Fn;
1971       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1972       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1973     } else {
1974       Dtor = new llvm::GlobalVariable(
1975           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1976           llvm::GlobalValue::PrivateLinkage,
1977           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1978       ID = Dtor;
1979     }
1980     // Register the information for the entry associated with the destructor.
1981     Out.clear();
1982     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1983         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1984         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1985   }
1986   return CGM.getLangOpts().OpenMPIsDevice;
1987 }
1988 
1989 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1990                                                           QualType VarType,
1991                                                           StringRef Name) {
1992   std::string Suffix = getName({"artificial", ""});
1993   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1994   llvm::Value *GAddr =
1995       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1996   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1997       CGM.getTarget().isTLSSupported()) {
1998     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1999     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2000   }
2001   std::string CacheSuffix = getName({"cache", ""});
2002   llvm::Value *Args[] = {
2003       emitUpdateLocation(CGF, SourceLocation()),
2004       getThreadID(CGF, SourceLocation()),
2005       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2006       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2007                                 /*isSigned=*/false),
2008       getOrCreateInternalVariable(
2009           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2010   return Address(
2011       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2012           CGF.EmitRuntimeCall(
2013               OMPBuilder.getOrCreateRuntimeFunction(
2014                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2015               Args),
2016           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2017       CGM.getContext().getTypeAlignInChars(VarType));
2018 }
2019 
2020 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2021                                    const RegionCodeGenTy &ThenGen,
2022                                    const RegionCodeGenTy &ElseGen) {
2023   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2024 
2025   // If the condition constant folds and can be elided, try to avoid emitting
2026   // the condition and the dead arm of the if/else.
2027   bool CondConstant;
2028   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2029     if (CondConstant)
2030       ThenGen(CGF);
2031     else
2032       ElseGen(CGF);
2033     return;
2034   }
2035 
2036   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2037   // emit the conditional branch.
2038   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2039   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2040   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2041   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2042 
2043   // Emit the 'then' code.
2044   CGF.EmitBlock(ThenBlock);
2045   ThenGen(CGF);
2046   CGF.EmitBranch(ContBlock);
2047   // Emit the 'else' code if present.
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBlock(ElseBlock);
2051   ElseGen(CGF);
2052   // There is no need to emit line number for unconditional branch.
2053   (void)ApplyDebugLocation::CreateEmpty(CGF);
2054   CGF.EmitBranch(ContBlock);
2055   // Emit the continuation block for code after the if.
2056   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2057 }
2058 
2059 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2060                                        llvm::Function *OutlinedFn,
2061                                        ArrayRef<llvm::Value *> CapturedVars,
2062                                        const Expr *IfCond) {
2063   if (!CGF.HaveInsertPoint())
2064     return;
2065   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2066   auto &M = CGM.getModule();
2067   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2068                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2069     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2070     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2071     llvm::Value *Args[] = {
2072         RTLoc,
2073         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2074         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2075     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2076     RealArgs.append(std::begin(Args), std::end(Args));
2077     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2078 
2079     llvm::FunctionCallee RTLFn =
2080         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2081     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2082   };
2083   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2086     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2087     // Build calls:
2088     // __kmpc_serialized_parallel(&Loc, GTid);
2089     llvm::Value *Args[] = {RTLoc, ThreadID};
2090     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2091                             M, OMPRTL___kmpc_serialized_parallel),
2092                         Args);
2093 
2094     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2095     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2096     Address ZeroAddrBound =
2097         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2098                                          /*Name=*/".bound.zero.addr");
2099     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2100     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2101     // ThreadId for serialized parallels is 0.
2102     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2103     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2104     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2105 
2106     // Ensure we do not inline the function. This is trivially true for the ones
2107     // passed to __kmpc_fork_call but the ones calles in serialized regions
2108     // could be inlined. This is not a perfect but it is closer to the invariant
2109     // we want, namely, every data environment starts with a new function.
2110     // TODO: We should pass the if condition to the runtime function and do the
2111     //       handling there. Much cleaner code.
2112     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2113     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2114 
2115     // __kmpc_end_serialized_parallel(&Loc, GTid);
2116     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2117     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2118                             M, OMPRTL___kmpc_end_serialized_parallel),
2119                         EndArgs);
2120   };
2121   if (IfCond) {
2122     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2123   } else {
2124     RegionCodeGenTy ThenRCG(ThenGen);
2125     ThenRCG(CGF);
2126   }
2127 }
2128 
2129 // If we're inside an (outlined) parallel region, use the region info's
2130 // thread-ID variable (it is passed in a first argument of the outlined function
2131 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2132 // regular serial code region, get thread ID by calling kmp_int32
2133 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2134 // return the address of that temp.
2135 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2136                                              SourceLocation Loc) {
2137   if (auto *OMPRegionInfo =
2138           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2139     if (OMPRegionInfo->getThreadIDVariable())
2140       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2141 
2142   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2143   QualType Int32Ty =
2144       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2145   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2146   CGF.EmitStoreOfScalar(ThreadID,
2147                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2148 
2149   return ThreadIDTemp;
2150 }
2151 
2152 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2153     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2154   SmallString<256> Buffer;
2155   llvm::raw_svector_ostream Out(Buffer);
2156   Out << Name;
2157   StringRef RuntimeName = Out.str();
2158   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2159   if (Elem.second) {
2160     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2161            "OMP internal variable has different type than requested");
2162     return &*Elem.second;
2163   }
2164 
2165   return Elem.second = new llvm::GlobalVariable(
2166              CGM.getModule(), Ty, /*IsConstant*/ false,
2167              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2168              Elem.first(), /*InsertBefore=*/nullptr,
2169              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2170 }
2171 
2172 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2173   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2174   std::string Name = getName({Prefix, "var"});
2175   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2176 }
2177 
2178 namespace {
2179 /// Common pre(post)-action for different OpenMP constructs.
2180 class CommonActionTy final : public PrePostActionTy {
2181   llvm::FunctionCallee EnterCallee;
2182   ArrayRef<llvm::Value *> EnterArgs;
2183   llvm::FunctionCallee ExitCallee;
2184   ArrayRef<llvm::Value *> ExitArgs;
2185   bool Conditional;
2186   llvm::BasicBlock *ContBlock = nullptr;
2187 
2188 public:
2189   CommonActionTy(llvm::FunctionCallee EnterCallee,
2190                  ArrayRef<llvm::Value *> EnterArgs,
2191                  llvm::FunctionCallee ExitCallee,
2192                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2193       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2194         ExitArgs(ExitArgs), Conditional(Conditional) {}
2195   void Enter(CodeGenFunction &CGF) override {
2196     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2197     if (Conditional) {
2198       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2199       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2200       ContBlock = CGF.createBasicBlock("omp_if.end");
2201       // Generate the branch (If-stmt)
2202       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2203       CGF.EmitBlock(ThenBlock);
2204     }
2205   }
2206   void Done(CodeGenFunction &CGF) {
2207     // Emit the rest of blocks/branches
2208     CGF.EmitBranch(ContBlock);
2209     CGF.EmitBlock(ContBlock, true);
2210   }
2211   void Exit(CodeGenFunction &CGF) override {
2212     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2213   }
2214 };
2215 } // anonymous namespace
2216 
2217 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2218                                          StringRef CriticalName,
2219                                          const RegionCodeGenTy &CriticalOpGen,
2220                                          SourceLocation Loc, const Expr *Hint) {
2221   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2222   // CriticalOpGen();
2223   // __kmpc_end_critical(ident_t *, gtid, Lock);
2224   // Prepare arguments and build a call to __kmpc_critical
2225   if (!CGF.HaveInsertPoint())
2226     return;
2227   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2228                          getCriticalRegionLock(CriticalName)};
2229   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2230                                                 std::end(Args));
2231   if (Hint) {
2232     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2233         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2234   }
2235   CommonActionTy Action(
2236       OMPBuilder.getOrCreateRuntimeFunction(
2237           CGM.getModule(),
2238           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2239       EnterArgs,
2240       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2241                                             OMPRTL___kmpc_end_critical),
2242       Args);
2243   CriticalOpGen.setAction(Action);
2244   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2245 }
2246 
2247 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2248                                        const RegionCodeGenTy &MasterOpGen,
2249                                        SourceLocation Loc) {
2250   if (!CGF.HaveInsertPoint())
2251     return;
2252   // if(__kmpc_master(ident_t *, gtid)) {
2253   //   MasterOpGen();
2254   //   __kmpc_end_master(ident_t *, gtid);
2255   // }
2256   // Prepare arguments and build a call to __kmpc_master
2257   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2258   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2259                             CGM.getModule(), OMPRTL___kmpc_master),
2260                         Args,
2261                         OMPBuilder.getOrCreateRuntimeFunction(
2262                             CGM.getModule(), OMPRTL___kmpc_end_master),
2263                         Args,
2264                         /*Conditional=*/true);
2265   MasterOpGen.setAction(Action);
2266   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2267   Action.Done(CGF);
2268 }
2269 
2270 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2271                                         SourceLocation Loc) {
2272   if (!CGF.HaveInsertPoint())
2273     return;
2274   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2275     OMPBuilder.createTaskyield(CGF.Builder);
2276   } else {
2277     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2278     llvm::Value *Args[] = {
2279         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2280         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2281     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2283                         Args);
2284   }
2285 
2286   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2287     Region->emitUntiedSwitch(CGF);
2288 }
2289 
2290 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2291                                           const RegionCodeGenTy &TaskgroupOpGen,
2292                                           SourceLocation Loc) {
2293   if (!CGF.HaveInsertPoint())
2294     return;
2295   // __kmpc_taskgroup(ident_t *, gtid);
2296   // TaskgroupOpGen();
2297   // __kmpc_end_taskgroup(ident_t *, gtid);
2298   // Prepare arguments and build a call to __kmpc_taskgroup
2299   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2300   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2301                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2302                         Args,
2303                         OMPBuilder.getOrCreateRuntimeFunction(
2304                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2305                         Args);
2306   TaskgroupOpGen.setAction(Action);
2307   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2308 }
2309 
2310 /// Given an array of pointers to variables, project the address of a
2311 /// given variable.
2312 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2313                                       unsigned Index, const VarDecl *Var) {
2314   // Pull out the pointer to the variable.
2315   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2316   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2317 
2318   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2319   Addr = CGF.Builder.CreateElementBitCast(
2320       Addr, CGF.ConvertTypeForMem(Var->getType()));
2321   return Addr;
2322 }
2323 
2324 static llvm::Value *emitCopyprivateCopyFunction(
2325     CodeGenModule &CGM, llvm::Type *ArgsType,
2326     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2327     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2328     SourceLocation Loc) {
2329   ASTContext &C = CGM.getContext();
2330   // void copy_func(void *LHSArg, void *RHSArg);
2331   FunctionArgList Args;
2332   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2333                            ImplicitParamDecl::Other);
2334   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2335                            ImplicitParamDecl::Other);
2336   Args.push_back(&LHSArg);
2337   Args.push_back(&RHSArg);
2338   const auto &CGFI =
2339       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2340   std::string Name =
2341       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2342   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2343                                     llvm::GlobalValue::InternalLinkage, Name,
2344                                     &CGM.getModule());
2345   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2346   Fn->setDoesNotRecurse();
2347   CodeGenFunction CGF(CGM);
2348   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2349   // Dest = (void*[n])(LHSArg);
2350   // Src = (void*[n])(RHSArg);
2351   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2352       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2353       ArgsType), CGF.getPointerAlign());
2354   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2355       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2356       ArgsType), CGF.getPointerAlign());
2357   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2358   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2359   // ...
2360   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2361   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2362     const auto *DestVar =
2363         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2364     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2365 
2366     const auto *SrcVar =
2367         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2368     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2369 
2370     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2371     QualType Type = VD->getType();
2372     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2373   }
2374   CGF.FinishFunction();
2375   return Fn;
2376 }
2377 
2378 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2379                                        const RegionCodeGenTy &SingleOpGen,
2380                                        SourceLocation Loc,
2381                                        ArrayRef<const Expr *> CopyprivateVars,
2382                                        ArrayRef<const Expr *> SrcExprs,
2383                                        ArrayRef<const Expr *> DstExprs,
2384                                        ArrayRef<const Expr *> AssignmentOps) {
2385   if (!CGF.HaveInsertPoint())
2386     return;
2387   assert(CopyprivateVars.size() == SrcExprs.size() &&
2388          CopyprivateVars.size() == DstExprs.size() &&
2389          CopyprivateVars.size() == AssignmentOps.size());
2390   ASTContext &C = CGM.getContext();
2391   // int32 did_it = 0;
2392   // if(__kmpc_single(ident_t *, gtid)) {
2393   //   SingleOpGen();
2394   //   __kmpc_end_single(ident_t *, gtid);
2395   //   did_it = 1;
2396   // }
2397   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2398   // <copy_func>, did_it);
2399 
2400   Address DidIt = Address::invalid();
2401   if (!CopyprivateVars.empty()) {
2402     // int32 did_it = 0;
2403     QualType KmpInt32Ty =
2404         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2405     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2406     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2407   }
2408   // Prepare arguments and build a call to __kmpc_single
2409   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2410   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2411                             CGM.getModule(), OMPRTL___kmpc_single),
2412                         Args,
2413                         OMPBuilder.getOrCreateRuntimeFunction(
2414                             CGM.getModule(), OMPRTL___kmpc_end_single),
2415                         Args,
2416                         /*Conditional=*/true);
2417   SingleOpGen.setAction(Action);
2418   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2419   if (DidIt.isValid()) {
2420     // did_it = 1;
2421     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2422   }
2423   Action.Done(CGF);
2424   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2425   // <copy_func>, did_it);
2426   if (DidIt.isValid()) {
2427     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2428     QualType CopyprivateArrayTy = C.getConstantArrayType(
2429         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2430         /*IndexTypeQuals=*/0);
2431     // Create a list of all private variables for copyprivate.
2432     Address CopyprivateList =
2433         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2434     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2435       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2436       CGF.Builder.CreateStore(
2437           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2438               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2439               CGF.VoidPtrTy),
2440           Elem);
2441     }
2442     // Build function that copies private values from single region to all other
2443     // threads in the corresponding parallel region.
2444     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2445         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2446         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2447     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2448     Address CL =
2449       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2450                                                       CGF.VoidPtrTy);
2451     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2452     llvm::Value *Args[] = {
2453         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2454         getThreadID(CGF, Loc),        // i32 <gtid>
2455         BufSize,                      // size_t <buf_size>
2456         CL.getPointer(),              // void *<copyprivate list>
2457         CpyFn,                        // void (*) (void *, void *) <copy_func>
2458         DidItVal                      // i32 did_it
2459     };
2460     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2461                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2462                         Args);
2463   }
2464 }
2465 
2466 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2467                                         const RegionCodeGenTy &OrderedOpGen,
2468                                         SourceLocation Loc, bool IsThreads) {
2469   if (!CGF.HaveInsertPoint())
2470     return;
2471   // __kmpc_ordered(ident_t *, gtid);
2472   // OrderedOpGen();
2473   // __kmpc_end_ordered(ident_t *, gtid);
2474   // Prepare arguments and build a call to __kmpc_ordered
2475   if (IsThreads) {
2476     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2477     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2478                               CGM.getModule(), OMPRTL___kmpc_ordered),
2479                           Args,
2480                           OMPBuilder.getOrCreateRuntimeFunction(
2481                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2482                           Args);
2483     OrderedOpGen.setAction(Action);
2484     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2485     return;
2486   }
2487   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2488 }
2489 
2490 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2491   unsigned Flags;
2492   if (Kind == OMPD_for)
2493     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2494   else if (Kind == OMPD_sections)
2495     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2496   else if (Kind == OMPD_single)
2497     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2498   else if (Kind == OMPD_barrier)
2499     Flags = OMP_IDENT_BARRIER_EXPL;
2500   else
2501     Flags = OMP_IDENT_BARRIER_IMPL;
2502   return Flags;
2503 }
2504 
2505 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2506     CodeGenFunction &CGF, const OMPLoopDirective &S,
2507     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2508   // Check if the loop directive is actually a doacross loop directive. In this
2509   // case choose static, 1 schedule.
2510   if (llvm::any_of(
2511           S.getClausesOfKind<OMPOrderedClause>(),
2512           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2513     ScheduleKind = OMPC_SCHEDULE_static;
2514     // Chunk size is 1 in this case.
2515     llvm::APInt ChunkSize(32, 1);
2516     ChunkExpr = IntegerLiteral::Create(
2517         CGF.getContext(), ChunkSize,
2518         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2519         SourceLocation());
2520   }
2521 }
2522 
2523 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2524                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2525                                       bool ForceSimpleCall) {
2526   // Check if we should use the OMPBuilder
2527   auto *OMPRegionInfo =
2528       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2529   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2530     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2531         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2532     return;
2533   }
2534 
2535   if (!CGF.HaveInsertPoint())
2536     return;
2537   // Build call __kmpc_cancel_barrier(loc, thread_id);
2538   // Build call __kmpc_barrier(loc, thread_id);
2539   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2540   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2541   // thread_id);
2542   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2543                          getThreadID(CGF, Loc)};
2544   if (OMPRegionInfo) {
2545     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2546       llvm::Value *Result = CGF.EmitRuntimeCall(
2547           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2548                                                 OMPRTL___kmpc_cancel_barrier),
2549           Args);
2550       if (EmitChecks) {
2551         // if (__kmpc_cancel_barrier()) {
2552         //   exit from construct;
2553         // }
2554         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2555         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2556         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2557         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2558         CGF.EmitBlock(ExitBB);
2559         //   exit from construct;
2560         CodeGenFunction::JumpDest CancelDestination =
2561             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2562         CGF.EmitBranchThroughCleanup(CancelDestination);
2563         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2564       }
2565       return;
2566     }
2567   }
2568   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2569                           CGM.getModule(), OMPRTL___kmpc_barrier),
2570                       Args);
2571 }
2572 
2573 /// Map the OpenMP loop schedule to the runtime enumeration.
2574 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2575                                           bool Chunked, bool Ordered) {
2576   switch (ScheduleKind) {
2577   case OMPC_SCHEDULE_static:
2578     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2579                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2580   case OMPC_SCHEDULE_dynamic:
2581     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2582   case OMPC_SCHEDULE_guided:
2583     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2584   case OMPC_SCHEDULE_runtime:
2585     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2586   case OMPC_SCHEDULE_auto:
2587     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2588   case OMPC_SCHEDULE_unknown:
2589     assert(!Chunked && "chunk was specified but schedule kind not known");
2590     return Ordered ? OMP_ord_static : OMP_sch_static;
2591   }
2592   llvm_unreachable("Unexpected runtime schedule");
2593 }
2594 
2595 /// Map the OpenMP distribute schedule to the runtime enumeration.
2596 static OpenMPSchedType
2597 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2598   // only static is allowed for dist_schedule
2599   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2600 }
2601 
2602 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2603                                          bool Chunked) const {
2604   OpenMPSchedType Schedule =
2605       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2606   return Schedule == OMP_sch_static;
2607 }
2608 
2609 bool CGOpenMPRuntime::isStaticNonchunked(
2610     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2611   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2612   return Schedule == OMP_dist_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2616                                       bool Chunked) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619   return Schedule == OMP_sch_static_chunked;
2620 }
2621 
2622 bool CGOpenMPRuntime::isStaticChunked(
2623     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625   return Schedule == OMP_dist_sch_static_chunked;
2626 }
2627 
2628 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2629   OpenMPSchedType Schedule =
2630       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2631   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2632   return Schedule != OMP_sch_static;
2633 }
2634 
2635 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2636                                   OpenMPScheduleClauseModifier M1,
2637                                   OpenMPScheduleClauseModifier M2) {
2638   int Modifier = 0;
2639   switch (M1) {
2640   case OMPC_SCHEDULE_MODIFIER_monotonic:
2641     Modifier = OMP_sch_modifier_monotonic;
2642     break;
2643   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2644     Modifier = OMP_sch_modifier_nonmonotonic;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_simd:
2647     if (Schedule == OMP_sch_static_chunked)
2648       Schedule = OMP_sch_static_balanced_chunked;
2649     break;
2650   case OMPC_SCHEDULE_MODIFIER_last:
2651   case OMPC_SCHEDULE_MODIFIER_unknown:
2652     break;
2653   }
2654   switch (M2) {
2655   case OMPC_SCHEDULE_MODIFIER_monotonic:
2656     Modifier = OMP_sch_modifier_monotonic;
2657     break;
2658   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2659     Modifier = OMP_sch_modifier_nonmonotonic;
2660     break;
2661   case OMPC_SCHEDULE_MODIFIER_simd:
2662     if (Schedule == OMP_sch_static_chunked)
2663       Schedule = OMP_sch_static_balanced_chunked;
2664     break;
2665   case OMPC_SCHEDULE_MODIFIER_last:
2666   case OMPC_SCHEDULE_MODIFIER_unknown:
2667     break;
2668   }
2669   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2670   // If the static schedule kind is specified or if the ordered clause is
2671   // specified, and if the nonmonotonic modifier is not specified, the effect is
2672   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2673   // modifier is specified, the effect is as if the nonmonotonic modifier is
2674   // specified.
2675   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2676     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2677           Schedule == OMP_sch_static_balanced_chunked ||
2678           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2679           Schedule == OMP_dist_sch_static_chunked ||
2680           Schedule == OMP_dist_sch_static))
2681       Modifier = OMP_sch_modifier_nonmonotonic;
2682   }
2683   return Schedule | Modifier;
2684 }
2685 
2686 void CGOpenMPRuntime::emitForDispatchInit(
2687     CodeGenFunction &CGF, SourceLocation Loc,
2688     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2689     bool Ordered, const DispatchRTInput &DispatchValues) {
2690   if (!CGF.HaveInsertPoint())
2691     return;
2692   OpenMPSchedType Schedule = getRuntimeSchedule(
2693       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2694   assert(Ordered ||
2695          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2696           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2697           Schedule != OMP_sch_static_balanced_chunked));
2698   // Call __kmpc_dispatch_init(
2699   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2700   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2701   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2702 
2703   // If the Chunk was not specified in the clause - use default value 1.
2704   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2705                                             : CGF.Builder.getIntN(IVSize, 1);
2706   llvm::Value *Args[] = {
2707       emitUpdateLocation(CGF, Loc),
2708       getThreadID(CGF, Loc),
2709       CGF.Builder.getInt32(addMonoNonMonoModifier(
2710           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2711       DispatchValues.LB,                                     // Lower
2712       DispatchValues.UB,                                     // Upper
2713       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2714       Chunk                                                  // Chunk
2715   };
2716   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2717 }
2718 
2719 static void emitForStaticInitCall(
2720     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2721     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2722     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2723     const CGOpenMPRuntime::StaticRTInput &Values) {
2724   if (!CGF.HaveInsertPoint())
2725     return;
2726 
2727   assert(!Values.Ordered);
2728   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2729          Schedule == OMP_sch_static_balanced_chunked ||
2730          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2731          Schedule == OMP_dist_sch_static ||
2732          Schedule == OMP_dist_sch_static_chunked);
2733 
2734   // Call __kmpc_for_static_init(
2735   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2736   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2737   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2738   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2739   llvm::Value *Chunk = Values.Chunk;
2740   if (Chunk == nullptr) {
2741     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2742             Schedule == OMP_dist_sch_static) &&
2743            "expected static non-chunked schedule");
2744     // If the Chunk was not specified in the clause - use default value 1.
2745     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2746   } else {
2747     assert((Schedule == OMP_sch_static_chunked ||
2748             Schedule == OMP_sch_static_balanced_chunked ||
2749             Schedule == OMP_ord_static_chunked ||
2750             Schedule == OMP_dist_sch_static_chunked) &&
2751            "expected static chunked schedule");
2752   }
2753   llvm::Value *Args[] = {
2754       UpdateLocation,
2755       ThreadId,
2756       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2757                                                   M2)), // Schedule type
2758       Values.IL.getPointer(),                           // &isLastIter
2759       Values.LB.getPointer(),                           // &LB
2760       Values.UB.getPointer(),                           // &UB
2761       Values.ST.getPointer(),                           // &Stride
2762       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2763       Chunk                                             // Chunk
2764   };
2765   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2766 }
2767 
2768 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2769                                         SourceLocation Loc,
2770                                         OpenMPDirectiveKind DKind,
2771                                         const OpenMPScheduleTy &ScheduleKind,
2772                                         const StaticRTInput &Values) {
2773   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2774       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2775   assert(isOpenMPWorksharingDirective(DKind) &&
2776          "Expected loop-based or sections-based directive.");
2777   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2778                                              isOpenMPLoopDirective(DKind)
2779                                                  ? OMP_IDENT_WORK_LOOP
2780                                                  : OMP_IDENT_WORK_SECTIONS);
2781   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2782   llvm::FunctionCallee StaticInitFunction =
2783       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2784   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2785   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2786                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2787 }
2788 
2789 void CGOpenMPRuntime::emitDistributeStaticInit(
2790     CodeGenFunction &CGF, SourceLocation Loc,
2791     OpenMPDistScheduleClauseKind SchedKind,
2792     const CGOpenMPRuntime::StaticRTInput &Values) {
2793   OpenMPSchedType ScheduleNum =
2794       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2795   llvm::Value *UpdatedLocation =
2796       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2797   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2798   llvm::FunctionCallee StaticInitFunction =
2799       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2800   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2801                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2802                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2803 }
2804 
2805 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2806                                           SourceLocation Loc,
2807                                           OpenMPDirectiveKind DKind) {
2808   if (!CGF.HaveInsertPoint())
2809     return;
2810   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2811   llvm::Value *Args[] = {
2812       emitUpdateLocation(CGF, Loc,
2813                          isOpenMPDistributeDirective(DKind)
2814                              ? OMP_IDENT_WORK_DISTRIBUTE
2815                              : isOpenMPLoopDirective(DKind)
2816                                    ? OMP_IDENT_WORK_LOOP
2817                                    : OMP_IDENT_WORK_SECTIONS),
2818       getThreadID(CGF, Loc)};
2819   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2820   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2821                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2822                       Args);
2823 }
2824 
2825 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2826                                                  SourceLocation Loc,
2827                                                  unsigned IVSize,
2828                                                  bool IVSigned) {
2829   if (!CGF.HaveInsertPoint())
2830     return;
2831   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2832   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2833   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2834 }
2835 
2836 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2837                                           SourceLocation Loc, unsigned IVSize,
2838                                           bool IVSigned, Address IL,
2839                                           Address LB, Address UB,
2840                                           Address ST) {
2841   // Call __kmpc_dispatch_next(
2842   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2843   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2844   //          kmp_int[32|64] *p_stride);
2845   llvm::Value *Args[] = {
2846       emitUpdateLocation(CGF, Loc),
2847       getThreadID(CGF, Loc),
2848       IL.getPointer(), // &isLastIter
2849       LB.getPointer(), // &Lower
2850       UB.getPointer(), // &Upper
2851       ST.getPointer()  // &Stride
2852   };
2853   llvm::Value *Call =
2854       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2855   return CGF.EmitScalarConversion(
2856       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2857       CGF.getContext().BoolTy, Loc);
2858 }
2859 
2860 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2861                                            llvm::Value *NumThreads,
2862                                            SourceLocation Loc) {
2863   if (!CGF.HaveInsertPoint())
2864     return;
2865   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2866   llvm::Value *Args[] = {
2867       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2868       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2869   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2870                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2871                       Args);
2872 }
2873 
2874 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2875                                          ProcBindKind ProcBind,
2876                                          SourceLocation Loc) {
2877   if (!CGF.HaveInsertPoint())
2878     return;
2879   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2880   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2881   llvm::Value *Args[] = {
2882       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2883       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2884   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2886                       Args);
2887 }
2888 
2889 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2890                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2891   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2892     OMPBuilder.createFlush(CGF.Builder);
2893   } else {
2894     if (!CGF.HaveInsertPoint())
2895       return;
2896     // Build call void __kmpc_flush(ident_t *loc)
2897     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2898                             CGM.getModule(), OMPRTL___kmpc_flush),
2899                         emitUpdateLocation(CGF, Loc));
2900   }
2901 }
2902 
2903 namespace {
2904 /// Indexes of fields for type kmp_task_t.
2905 enum KmpTaskTFields {
2906   /// List of shared variables.
2907   KmpTaskTShareds,
2908   /// Task routine.
2909   KmpTaskTRoutine,
2910   /// Partition id for the untied tasks.
2911   KmpTaskTPartId,
2912   /// Function with call of destructors for private variables.
2913   Data1,
2914   /// Task priority.
2915   Data2,
2916   /// (Taskloops only) Lower bound.
2917   KmpTaskTLowerBound,
2918   /// (Taskloops only) Upper bound.
2919   KmpTaskTUpperBound,
2920   /// (Taskloops only) Stride.
2921   KmpTaskTStride,
2922   /// (Taskloops only) Is last iteration flag.
2923   KmpTaskTLastIter,
2924   /// (Taskloops only) Reduction data.
2925   KmpTaskTReductions,
2926 };
2927 } // anonymous namespace
2928 
2929 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2930   return OffloadEntriesTargetRegion.empty() &&
2931          OffloadEntriesDeviceGlobalVar.empty();
2932 }
2933 
2934 /// Initialize target region entry.
2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2936     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2937                                     StringRef ParentName, unsigned LineNum,
2938                                     unsigned Order) {
2939   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2940                                              "only required for the device "
2941                                              "code generation.");
2942   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2943       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2944                                    OMPTargetRegionEntryTargetRegion);
2945   ++OffloadingEntriesNum;
2946 }
2947 
2948 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2949     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2950                                   StringRef ParentName, unsigned LineNum,
2951                                   llvm::Constant *Addr, llvm::Constant *ID,
2952                                   OMPTargetRegionEntryKind Flags) {
2953   // If we are emitting code for a target, the entry is already initialized,
2954   // only has to be registered.
2955   if (CGM.getLangOpts().OpenMPIsDevice) {
2956     // This could happen if the device compilation is invoked standalone.
2957     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2958       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2959                                       OffloadingEntriesNum);
2960     auto &Entry =
2961         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2962     Entry.setAddress(Addr);
2963     Entry.setID(ID);
2964     Entry.setFlags(Flags);
2965   } else {
2966     if (Flags ==
2967             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2968         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2969                                  /*IgnoreAddressId*/ true))
2970       return;
2971     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2972            "Target region entry already registered!");
2973     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2974     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2975     ++OffloadingEntriesNum;
2976   }
2977 }
2978 
2979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2980     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2981     bool IgnoreAddressId) const {
2982   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2983   if (PerDevice == OffloadEntriesTargetRegion.end())
2984     return false;
2985   auto PerFile = PerDevice->second.find(FileID);
2986   if (PerFile == PerDevice->second.end())
2987     return false;
2988   auto PerParentName = PerFile->second.find(ParentName);
2989   if (PerParentName == PerFile->second.end())
2990     return false;
2991   auto PerLine = PerParentName->second.find(LineNum);
2992   if (PerLine == PerParentName->second.end())
2993     return false;
2994   // Fail if this entry is already registered.
2995   if (!IgnoreAddressId &&
2996       (PerLine->second.getAddress() || PerLine->second.getID()))
2997     return false;
2998   return true;
2999 }
3000 
3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3002     const OffloadTargetRegionEntryInfoActTy &Action) {
3003   // Scan all target region entries and perform the provided action.
3004   for (const auto &D : OffloadEntriesTargetRegion)
3005     for (const auto &F : D.second)
3006       for (const auto &P : F.second)
3007         for (const auto &L : P.second)
3008           Action(D.first, F.first, P.first(), L.first, L.second);
3009 }
3010 
3011 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3012     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3013                                        OMPTargetGlobalVarEntryKind Flags,
3014                                        unsigned Order) {
3015   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3016                                              "only required for the device "
3017                                              "code generation.");
3018   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3019   ++OffloadingEntriesNum;
3020 }
3021 
3022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3023     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3024                                      CharUnits VarSize,
3025                                      OMPTargetGlobalVarEntryKind Flags,
3026                                      llvm::GlobalValue::LinkageTypes Linkage) {
3027   if (CGM.getLangOpts().OpenMPIsDevice) {
3028     // This could happen if the device compilation is invoked standalone.
3029     if (!hasDeviceGlobalVarEntryInfo(VarName))
3030       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3031     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3032     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3033            "Resetting with the new address.");
3034     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3035       if (Entry.getVarSize().isZero()) {
3036         Entry.setVarSize(VarSize);
3037         Entry.setLinkage(Linkage);
3038       }
3039       return;
3040     }
3041     Entry.setVarSize(VarSize);
3042     Entry.setLinkage(Linkage);
3043     Entry.setAddress(Addr);
3044   } else {
3045     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3046       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3047       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3048              "Entry not initialized!");
3049       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3050              "Resetting with the new address.");
3051       if (Entry.getVarSize().isZero()) {
3052         Entry.setVarSize(VarSize);
3053         Entry.setLinkage(Linkage);
3054       }
3055       return;
3056     }
3057     OffloadEntriesDeviceGlobalVar.try_emplace(
3058         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3059     ++OffloadingEntriesNum;
3060   }
3061 }
3062 
3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3064     actOnDeviceGlobalVarEntriesInfo(
3065         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3066   // Scan all target region entries and perform the provided action.
3067   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3068     Action(E.getKey(), E.getValue());
3069 }
3070 
3071 void CGOpenMPRuntime::createOffloadEntry(
3072     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3073     llvm::GlobalValue::LinkageTypes Linkage) {
3074   StringRef Name = Addr->getName();
3075   llvm::Module &M = CGM.getModule();
3076   llvm::LLVMContext &C = M.getContext();
3077 
3078   // Create constant string with the name.
3079   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3080 
3081   std::string StringName = getName({"omp_offloading", "entry_name"});
3082   auto *Str = new llvm::GlobalVariable(
3083       M, StrPtrInit->getType(), /*isConstant=*/true,
3084       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3085   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3086 
3087   llvm::Constant *Data[] = {
3088       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3089       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3090       llvm::ConstantInt::get(CGM.SizeTy, Size),
3091       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3092       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3093   std::string EntryName = getName({"omp_offloading", "entry", ""});
3094   llvm::GlobalVariable *Entry = createGlobalStruct(
3095       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3096       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3097 
3098   // The entry has to be created in the section the linker expects it to be.
3099   Entry->setSection("omp_offloading_entries");
3100 }
3101 
3102 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3103   // Emit the offloading entries and metadata so that the device codegen side
3104   // can easily figure out what to emit. The produced metadata looks like
3105   // this:
3106   //
3107   // !omp_offload.info = !{!1, ...}
3108   //
3109   // Right now we only generate metadata for function that contain target
3110   // regions.
3111 
3112   // If we are in simd mode or there are no entries, we don't need to do
3113   // anything.
3114   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3115     return;
3116 
3117   llvm::Module &M = CGM.getModule();
3118   llvm::LLVMContext &C = M.getContext();
3119   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3120                          SourceLocation, StringRef>,
3121               16>
3122       OrderedEntries(OffloadEntriesInfoManager.size());
3123   llvm::SmallVector<StringRef, 16> ParentFunctions(
3124       OffloadEntriesInfoManager.size());
3125 
3126   // Auxiliary methods to create metadata values and strings.
3127   auto &&GetMDInt = [this](unsigned V) {
3128     return llvm::ConstantAsMetadata::get(
3129         llvm::ConstantInt::get(CGM.Int32Ty, V));
3130   };
3131 
3132   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3133 
3134   // Create the offloading info metadata node.
3135   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3136 
3137   // Create function that emits metadata for each target region entry;
3138   auto &&TargetRegionMetadataEmitter =
3139       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3140        &GetMDString](
3141           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3142           unsigned Line,
3143           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3144         // Generate metadata for target regions. Each entry of this metadata
3145         // contains:
3146         // - Entry 0 -> Kind of this type of metadata (0).
3147         // - Entry 1 -> Device ID of the file where the entry was identified.
3148         // - Entry 2 -> File ID of the file where the entry was identified.
3149         // - Entry 3 -> Mangled name of the function where the entry was
3150         // identified.
3151         // - Entry 4 -> Line in the file where the entry was identified.
3152         // - Entry 5 -> Order the entry was created.
3153         // The first element of the metadata node is the kind.
3154         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3155                                  GetMDInt(FileID),      GetMDString(ParentName),
3156                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3157 
3158         SourceLocation Loc;
3159         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3160                   E = CGM.getContext().getSourceManager().fileinfo_end();
3161              I != E; ++I) {
3162           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3163               I->getFirst()->getUniqueID().getFile() == FileID) {
3164             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3165                 I->getFirst(), Line, 1);
3166             break;
3167           }
3168         }
3169         // Save this entry in the right position of the ordered entries array.
3170         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3171         ParentFunctions[E.getOrder()] = ParentName;
3172 
3173         // Add metadata to the named metadata node.
3174         MD->addOperand(llvm::MDNode::get(C, Ops));
3175       };
3176 
3177   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3178       TargetRegionMetadataEmitter);
3179 
3180   // Create function that emits metadata for each device global variable entry;
3181   auto &&DeviceGlobalVarMetadataEmitter =
3182       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3183        MD](StringRef MangledName,
3184            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3185                &E) {
3186         // Generate metadata for global variables. Each entry of this metadata
3187         // contains:
3188         // - Entry 0 -> Kind of this type of metadata (1).
3189         // - Entry 1 -> Mangled name of the variable.
3190         // - Entry 2 -> Declare target kind.
3191         // - Entry 3 -> Order the entry was created.
3192         // The first element of the metadata node is the kind.
3193         llvm::Metadata *Ops[] = {
3194             GetMDInt(E.getKind()), GetMDString(MangledName),
3195             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3196 
3197         // Save this entry in the right position of the ordered entries array.
3198         OrderedEntries[E.getOrder()] =
3199             std::make_tuple(&E, SourceLocation(), MangledName);
3200 
3201         // Add metadata to the named metadata node.
3202         MD->addOperand(llvm::MDNode::get(C, Ops));
3203       };
3204 
3205   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3206       DeviceGlobalVarMetadataEmitter);
3207 
3208   for (const auto &E : OrderedEntries) {
3209     assert(std::get<0>(E) && "All ordered entries must exist!");
3210     if (const auto *CE =
3211             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3212                 std::get<0>(E))) {
3213       if (!CE->getID() || !CE->getAddress()) {
3214         // Do not blame the entry if the parent funtion is not emitted.
3215         StringRef FnName = ParentFunctions[CE->getOrder()];
3216         if (!CGM.GetGlobalValue(FnName))
3217           continue;
3218         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3219             DiagnosticsEngine::Error,
3220             "Offloading entry for target region in %0 is incorrect: either the "
3221             "address or the ID is invalid.");
3222         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3223         continue;
3224       }
3225       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3226                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3227     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3228                                              OffloadEntryInfoDeviceGlobalVar>(
3229                    std::get<0>(E))) {
3230       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3231           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3232               CE->getFlags());
3233       switch (Flags) {
3234       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3235         if (CGM.getLangOpts().OpenMPIsDevice &&
3236             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3237           continue;
3238         if (!CE->getAddress()) {
3239           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3240               DiagnosticsEngine::Error, "Offloading entry for declare target "
3241                                         "variable %0 is incorrect: the "
3242                                         "address is invalid.");
3243           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3244           continue;
3245         }
3246         // The vaiable has no definition - no need to add the entry.
3247         if (CE->getVarSize().isZero())
3248           continue;
3249         break;
3250       }
3251       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3252         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3253                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3254                "Declaret target link address is set.");
3255         if (CGM.getLangOpts().OpenMPIsDevice)
3256           continue;
3257         if (!CE->getAddress()) {
3258           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3259               DiagnosticsEngine::Error,
3260               "Offloading entry for declare target variable is incorrect: the "
3261               "address is invalid.");
3262           CGM.getDiags().Report(DiagID);
3263           continue;
3264         }
3265         break;
3266       }
3267       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3268                          CE->getVarSize().getQuantity(), Flags,
3269                          CE->getLinkage());
3270     } else {
3271       llvm_unreachable("Unsupported entry kind.");
3272     }
3273   }
3274 }
3275 
3276 /// Loads all the offload entries information from the host IR
3277 /// metadata.
3278 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3279   // If we are in target mode, load the metadata from the host IR. This code has
3280   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3281 
3282   if (!CGM.getLangOpts().OpenMPIsDevice)
3283     return;
3284 
3285   if (CGM.getLangOpts().OMPHostIRFile.empty())
3286     return;
3287 
3288   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3289   if (auto EC = Buf.getError()) {
3290     CGM.getDiags().Report(diag::err_cannot_open_file)
3291         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3292     return;
3293   }
3294 
3295   llvm::LLVMContext C;
3296   auto ME = expectedToErrorOrAndEmitErrors(
3297       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3298 
3299   if (auto EC = ME.getError()) {
3300     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3301         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3302     CGM.getDiags().Report(DiagID)
3303         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3304     return;
3305   }
3306 
3307   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3308   if (!MD)
3309     return;
3310 
3311   for (llvm::MDNode *MN : MD->operands()) {
3312     auto &&GetMDInt = [MN](unsigned Idx) {
3313       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3314       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3315     };
3316 
3317     auto &&GetMDString = [MN](unsigned Idx) {
3318       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3319       return V->getString();
3320     };
3321 
3322     switch (GetMDInt(0)) {
3323     default:
3324       llvm_unreachable("Unexpected metadata!");
3325       break;
3326     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3327         OffloadingEntryInfoTargetRegion:
3328       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3329           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3330           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3331           /*Order=*/GetMDInt(5));
3332       break;
3333     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3334         OffloadingEntryInfoDeviceGlobalVar:
3335       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3336           /*MangledName=*/GetMDString(1),
3337           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3338               /*Flags=*/GetMDInt(2)),
3339           /*Order=*/GetMDInt(3));
3340       break;
3341     }
3342   }
3343 }
3344 
3345 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3346   if (!KmpRoutineEntryPtrTy) {
3347     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3348     ASTContext &C = CGM.getContext();
3349     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3350     FunctionProtoType::ExtProtoInfo EPI;
3351     KmpRoutineEntryPtrQTy = C.getPointerType(
3352         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3353     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3354   }
3355 }
3356 
3357 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3358   // Make sure the type of the entry is already created. This is the type we
3359   // have to create:
3360   // struct __tgt_offload_entry{
3361   //   void      *addr;       // Pointer to the offload entry info.
3362   //                          // (function or global)
3363   //   char      *name;       // Name of the function or global.
3364   //   size_t     size;       // Size of the entry info (0 if it a function).
3365   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3366   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3367   // };
3368   if (TgtOffloadEntryQTy.isNull()) {
3369     ASTContext &C = CGM.getContext();
3370     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3371     RD->startDefinition();
3372     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3373     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3374     addFieldToRecordDecl(C, RD, C.getSizeType());
3375     addFieldToRecordDecl(
3376         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3377     addFieldToRecordDecl(
3378         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3379     RD->completeDefinition();
3380     RD->addAttr(PackedAttr::CreateImplicit(C));
3381     TgtOffloadEntryQTy = C.getRecordType(RD);
3382   }
3383   return TgtOffloadEntryQTy;
3384 }
3385 
3386 namespace {
3387 struct PrivateHelpersTy {
3388   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3389                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3390       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3391         PrivateElemInit(PrivateElemInit) {}
3392   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3393   const Expr *OriginalRef = nullptr;
3394   const VarDecl *Original = nullptr;
3395   const VarDecl *PrivateCopy = nullptr;
3396   const VarDecl *PrivateElemInit = nullptr;
3397   bool isLocalPrivate() const {
3398     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3399   }
3400 };
3401 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3402 } // anonymous namespace
3403 
3404 static bool isAllocatableDecl(const VarDecl *VD) {
3405   const VarDecl *CVD = VD->getCanonicalDecl();
3406   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3407     return false;
3408   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3409   // Use the default allocation.
3410   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3411             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3412            !AA->getAllocator());
3413 }
3414 
3415 static RecordDecl *
3416 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3417   if (!Privates.empty()) {
3418     ASTContext &C = CGM.getContext();
3419     // Build struct .kmp_privates_t. {
3420     //         /*  private vars  */
3421     //       };
3422     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3423     RD->startDefinition();
3424     for (const auto &Pair : Privates) {
3425       const VarDecl *VD = Pair.second.Original;
3426       QualType Type = VD->getType().getNonReferenceType();
3427       // If the private variable is a local variable with lvalue ref type,
3428       // allocate the pointer instead of the pointee type.
3429       if (Pair.second.isLocalPrivate()) {
3430         if (VD->getType()->isLValueReferenceType())
3431           Type = C.getPointerType(Type);
3432         if (isAllocatableDecl(VD))
3433           Type = C.getPointerType(Type);
3434       }
3435       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3436       if (VD->hasAttrs()) {
3437         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3438              E(VD->getAttrs().end());
3439              I != E; ++I)
3440           FD->addAttr(*I);
3441       }
3442     }
3443     RD->completeDefinition();
3444     return RD;
3445   }
3446   return nullptr;
3447 }
3448 
3449 static RecordDecl *
3450 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3451                          QualType KmpInt32Ty,
3452                          QualType KmpRoutineEntryPointerQTy) {
3453   ASTContext &C = CGM.getContext();
3454   // Build struct kmp_task_t {
3455   //         void *              shareds;
3456   //         kmp_routine_entry_t routine;
3457   //         kmp_int32           part_id;
3458   //         kmp_cmplrdata_t data1;
3459   //         kmp_cmplrdata_t data2;
3460   // For taskloops additional fields:
3461   //         kmp_uint64          lb;
3462   //         kmp_uint64          ub;
3463   //         kmp_int64           st;
3464   //         kmp_int32           liter;
3465   //         void *              reductions;
3466   //       };
3467   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3468   UD->startDefinition();
3469   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3470   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3471   UD->completeDefinition();
3472   QualType KmpCmplrdataTy = C.getRecordType(UD);
3473   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3474   RD->startDefinition();
3475   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3476   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3477   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3478   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3479   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3480   if (isOpenMPTaskLoopDirective(Kind)) {
3481     QualType KmpUInt64Ty =
3482         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3483     QualType KmpInt64Ty =
3484         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3485     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3486     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3487     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3488     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3489     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3490   }
3491   RD->completeDefinition();
3492   return RD;
3493 }
3494 
3495 static RecordDecl *
3496 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3497                                      ArrayRef<PrivateDataTy> Privates) {
3498   ASTContext &C = CGM.getContext();
3499   // Build struct kmp_task_t_with_privates {
3500   //         kmp_task_t task_data;
3501   //         .kmp_privates_t. privates;
3502   //       };
3503   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3504   RD->startDefinition();
3505   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3506   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3507     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3508   RD->completeDefinition();
3509   return RD;
3510 }
3511 
3512 /// Emit a proxy function which accepts kmp_task_t as the second
3513 /// argument.
3514 /// \code
3515 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3516 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3517 ///   For taskloops:
3518 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3519 ///   tt->reductions, tt->shareds);
3520 ///   return 0;
3521 /// }
3522 /// \endcode
3523 static llvm::Function *
3524 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3525                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3526                       QualType KmpTaskTWithPrivatesPtrQTy,
3527                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3528                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3529                       llvm::Value *TaskPrivatesMap) {
3530   ASTContext &C = CGM.getContext();
3531   FunctionArgList Args;
3532   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3533                             ImplicitParamDecl::Other);
3534   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3535                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3536                                 ImplicitParamDecl::Other);
3537   Args.push_back(&GtidArg);
3538   Args.push_back(&TaskTypeArg);
3539   const auto &TaskEntryFnInfo =
3540       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3541   llvm::FunctionType *TaskEntryTy =
3542       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3543   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3544   auto *TaskEntry = llvm::Function::Create(
3545       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3546   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3547   TaskEntry->setDoesNotRecurse();
3548   CodeGenFunction CGF(CGM);
3549   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3550                     Loc, Loc);
3551 
3552   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3553   // tt,
3554   // For taskloops:
3555   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3556   // tt->task_data.shareds);
3557   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3558       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3559   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3560       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3561       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3562   const auto *KmpTaskTWithPrivatesQTyRD =
3563       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3564   LValue Base =
3565       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3566   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3567   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3568   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3569   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3570 
3571   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3572   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3573   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3574       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3575       CGF.ConvertTypeForMem(SharedsPtrTy));
3576 
3577   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3578   llvm::Value *PrivatesParam;
3579   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3580     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3581     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3582         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3583   } else {
3584     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3585   }
3586 
3587   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3588                                TaskPrivatesMap,
3589                                CGF.Builder
3590                                    .CreatePointerBitCastOrAddrSpaceCast(
3591                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3592                                    .getPointer()};
3593   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3594                                           std::end(CommonArgs));
3595   if (isOpenMPTaskLoopDirective(Kind)) {
3596     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3597     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3598     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3599     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3600     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3601     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3602     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3603     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3604     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3605     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3606     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3607     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3608     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3609     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3610     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3611     CallArgs.push_back(LBParam);
3612     CallArgs.push_back(UBParam);
3613     CallArgs.push_back(StParam);
3614     CallArgs.push_back(LIParam);
3615     CallArgs.push_back(RParam);
3616   }
3617   CallArgs.push_back(SharedsParam);
3618 
3619   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3620                                                   CallArgs);
3621   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3622                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3623   CGF.FinishFunction();
3624   return TaskEntry;
3625 }
3626 
3627 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3628                                             SourceLocation Loc,
3629                                             QualType KmpInt32Ty,
3630                                             QualType KmpTaskTWithPrivatesPtrQTy,
3631                                             QualType KmpTaskTWithPrivatesQTy) {
3632   ASTContext &C = CGM.getContext();
3633   FunctionArgList Args;
3634   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3635                             ImplicitParamDecl::Other);
3636   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3637                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3638                                 ImplicitParamDecl::Other);
3639   Args.push_back(&GtidArg);
3640   Args.push_back(&TaskTypeArg);
3641   const auto &DestructorFnInfo =
3642       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3643   llvm::FunctionType *DestructorFnTy =
3644       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3645   std::string Name =
3646       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3647   auto *DestructorFn =
3648       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3649                              Name, &CGM.getModule());
3650   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3651                                     DestructorFnInfo);
3652   DestructorFn->setDoesNotRecurse();
3653   CodeGenFunction CGF(CGM);
3654   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3655                     Args, Loc, Loc);
3656 
3657   LValue Base = CGF.EmitLoadOfPointerLValue(
3658       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3659       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3660   const auto *KmpTaskTWithPrivatesQTyRD =
3661       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3662   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3663   Base = CGF.EmitLValueForField(Base, *FI);
3664   for (const auto *Field :
3665        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3666     if (QualType::DestructionKind DtorKind =
3667             Field->getType().isDestructedType()) {
3668       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3669       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3670     }
3671   }
3672   CGF.FinishFunction();
3673   return DestructorFn;
3674 }
3675 
3676 /// Emit a privates mapping function for correct handling of private and
3677 /// firstprivate variables.
3678 /// \code
3679 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3680 /// **noalias priv1,...,  <tyn> **noalias privn) {
3681 ///   *priv1 = &.privates.priv1;
3682 ///   ...;
3683 ///   *privn = &.privates.privn;
3684 /// }
3685 /// \endcode
3686 static llvm::Value *
3687 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3688                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3689                                ArrayRef<PrivateDataTy> Privates) {
3690   ASTContext &C = CGM.getContext();
3691   FunctionArgList Args;
3692   ImplicitParamDecl TaskPrivatesArg(
3693       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3694       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3695       ImplicitParamDecl::Other);
3696   Args.push_back(&TaskPrivatesArg);
3697   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3698   unsigned Counter = 1;
3699   for (const Expr *E : Data.PrivateVars) {
3700     Args.push_back(ImplicitParamDecl::Create(
3701         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3702         C.getPointerType(C.getPointerType(E->getType()))
3703             .withConst()
3704             .withRestrict(),
3705         ImplicitParamDecl::Other));
3706     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3707     PrivateVarsPos[VD] = Counter;
3708     ++Counter;
3709   }
3710   for (const Expr *E : Data.FirstprivateVars) {
3711     Args.push_back(ImplicitParamDecl::Create(
3712         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3713         C.getPointerType(C.getPointerType(E->getType()))
3714             .withConst()
3715             .withRestrict(),
3716         ImplicitParamDecl::Other));
3717     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3718     PrivateVarsPos[VD] = Counter;
3719     ++Counter;
3720   }
3721   for (const Expr *E : Data.LastprivateVars) {
3722     Args.push_back(ImplicitParamDecl::Create(
3723         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3724         C.getPointerType(C.getPointerType(E->getType()))
3725             .withConst()
3726             .withRestrict(),
3727         ImplicitParamDecl::Other));
3728     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3729     PrivateVarsPos[VD] = Counter;
3730     ++Counter;
3731   }
3732   for (const VarDecl *VD : Data.PrivateLocals) {
3733     QualType Ty = VD->getType().getNonReferenceType();
3734     if (VD->getType()->isLValueReferenceType())
3735       Ty = C.getPointerType(Ty);
3736     if (isAllocatableDecl(VD))
3737       Ty = C.getPointerType(Ty);
3738     Args.push_back(ImplicitParamDecl::Create(
3739         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3740         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3741         ImplicitParamDecl::Other));
3742     PrivateVarsPos[VD] = Counter;
3743     ++Counter;
3744   }
3745   const auto &TaskPrivatesMapFnInfo =
3746       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3747   llvm::FunctionType *TaskPrivatesMapTy =
3748       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3749   std::string Name =
3750       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3751   auto *TaskPrivatesMap = llvm::Function::Create(
3752       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3753       &CGM.getModule());
3754   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3755                                     TaskPrivatesMapFnInfo);
3756   if (CGM.getLangOpts().Optimize) {
3757     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3758     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3759     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3760   }
3761   CodeGenFunction CGF(CGM);
3762   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3763                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3764 
3765   // *privi = &.privates.privi;
3766   LValue Base = CGF.EmitLoadOfPointerLValue(
3767       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3768       TaskPrivatesArg.getType()->castAs<PointerType>());
3769   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3770   Counter = 0;
3771   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3772     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3773     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3774     LValue RefLVal =
3775         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3776     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3777         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3778     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3779     ++Counter;
3780   }
3781   CGF.FinishFunction();
3782   return TaskPrivatesMap;
3783 }
3784 
3785 /// Emit initialization for private variables in task-based directives.
3786 static void emitPrivatesInit(CodeGenFunction &CGF,
3787                              const OMPExecutableDirective &D,
3788                              Address KmpTaskSharedsPtr, LValue TDBase,
3789                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3790                              QualType SharedsTy, QualType SharedsPtrTy,
3791                              const OMPTaskDataTy &Data,
3792                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3793   ASTContext &C = CGF.getContext();
3794   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3795   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3796   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3797                                  ? OMPD_taskloop
3798                                  : OMPD_task;
3799   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3800   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3801   LValue SrcBase;
3802   bool IsTargetTask =
3803       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3804       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3805   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3806   // PointersArray, SizesArray, and MappersArray. The original variables for
3807   // these arrays are not captured and we get their addresses explicitly.
3808   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3809       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3810     SrcBase = CGF.MakeAddrLValue(
3811         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3812             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3813         SharedsTy);
3814   }
3815   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3816   for (const PrivateDataTy &Pair : Privates) {
3817     // Do not initialize private locals.
3818     if (Pair.second.isLocalPrivate()) {
3819       ++FI;
3820       continue;
3821     }
3822     const VarDecl *VD = Pair.second.PrivateCopy;
3823     const Expr *Init = VD->getAnyInitializer();
3824     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3825                              !CGF.isTrivialInitializer(Init)))) {
3826       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3827       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3828         const VarDecl *OriginalVD = Pair.second.Original;
3829         // Check if the variable is the target-based BasePointersArray,
3830         // PointersArray, SizesArray, or MappersArray.
3831         LValue SharedRefLValue;
3832         QualType Type = PrivateLValue.getType();
3833         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3834         if (IsTargetTask && !SharedField) {
3835           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3836                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3837                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3838                          ->getNumParams() == 0 &&
3839                  isa<TranslationUnitDecl>(
3840                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3841                          ->getDeclContext()) &&
3842                  "Expected artificial target data variable.");
3843           SharedRefLValue =
3844               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3845         } else if (ForDup) {
3846           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3847           SharedRefLValue = CGF.MakeAddrLValue(
3848               Address(SharedRefLValue.getPointer(CGF),
3849                       C.getDeclAlign(OriginalVD)),
3850               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3851               SharedRefLValue.getTBAAInfo());
3852         } else if (CGF.LambdaCaptureFields.count(
3853                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3854                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3855           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3856         } else {
3857           // Processing for implicitly captured variables.
3858           InlinedOpenMPRegionRAII Region(
3859               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3860               /*HasCancel=*/false);
3861           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3862         }
3863         if (Type->isArrayType()) {
3864           // Initialize firstprivate array.
3865           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3866             // Perform simple memcpy.
3867             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3868           } else {
3869             // Initialize firstprivate array using element-by-element
3870             // initialization.
3871             CGF.EmitOMPAggregateAssign(
3872                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3873                 Type,
3874                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3875                                                   Address SrcElement) {
3876                   // Clean up any temporaries needed by the initialization.
3877                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3878                   InitScope.addPrivate(
3879                       Elem, [SrcElement]() -> Address { return SrcElement; });
3880                   (void)InitScope.Privatize();
3881                   // Emit initialization for single element.
3882                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3883                       CGF, &CapturesInfo);
3884                   CGF.EmitAnyExprToMem(Init, DestElement,
3885                                        Init->getType().getQualifiers(),
3886                                        /*IsInitializer=*/false);
3887                 });
3888           }
3889         } else {
3890           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3891           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3892             return SharedRefLValue.getAddress(CGF);
3893           });
3894           (void)InitScope.Privatize();
3895           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3896           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3897                              /*capturedByInit=*/false);
3898         }
3899       } else {
3900         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3901       }
3902     }
3903     ++FI;
3904   }
3905 }
3906 
3907 /// Check if duplication function is required for taskloops.
3908 static bool checkInitIsRequired(CodeGenFunction &CGF,
3909                                 ArrayRef<PrivateDataTy> Privates) {
3910   bool InitRequired = false;
3911   for (const PrivateDataTy &Pair : Privates) {
3912     if (Pair.second.isLocalPrivate())
3913       continue;
3914     const VarDecl *VD = Pair.second.PrivateCopy;
3915     const Expr *Init = VD->getAnyInitializer();
3916     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3917                                     !CGF.isTrivialInitializer(Init));
3918     if (InitRequired)
3919       break;
3920   }
3921   return InitRequired;
3922 }
3923 
3924 
3925 /// Emit task_dup function (for initialization of
3926 /// private/firstprivate/lastprivate vars and last_iter flag)
3927 /// \code
3928 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3929 /// lastpriv) {
3930 /// // setup lastprivate flag
3931 ///    task_dst->last = lastpriv;
3932 /// // could be constructor calls here...
3933 /// }
3934 /// \endcode
3935 static llvm::Value *
3936 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3937                     const OMPExecutableDirective &D,
3938                     QualType KmpTaskTWithPrivatesPtrQTy,
3939                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3940                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3941                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3942                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3943   ASTContext &C = CGM.getContext();
3944   FunctionArgList Args;
3945   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3946                            KmpTaskTWithPrivatesPtrQTy,
3947                            ImplicitParamDecl::Other);
3948   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3949                            KmpTaskTWithPrivatesPtrQTy,
3950                            ImplicitParamDecl::Other);
3951   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3952                                 ImplicitParamDecl::Other);
3953   Args.push_back(&DstArg);
3954   Args.push_back(&SrcArg);
3955   Args.push_back(&LastprivArg);
3956   const auto &TaskDupFnInfo =
3957       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3958   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3959   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3960   auto *TaskDup = llvm::Function::Create(
3961       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3962   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3963   TaskDup->setDoesNotRecurse();
3964   CodeGenFunction CGF(CGM);
3965   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3966                     Loc);
3967 
3968   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3969       CGF.GetAddrOfLocalVar(&DstArg),
3970       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3971   // task_dst->liter = lastpriv;
3972   if (WithLastIter) {
3973     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3974     LValue Base = CGF.EmitLValueForField(
3975         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3976     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3977     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3978         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3979     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3980   }
3981 
3982   // Emit initial values for private copies (if any).
3983   assert(!Privates.empty());
3984   Address KmpTaskSharedsPtr = Address::invalid();
3985   if (!Data.FirstprivateVars.empty()) {
3986     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3987         CGF.GetAddrOfLocalVar(&SrcArg),
3988         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3989     LValue Base = CGF.EmitLValueForField(
3990         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3991     KmpTaskSharedsPtr = Address(
3992         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3993                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3994                                                   KmpTaskTShareds)),
3995                              Loc),
3996         CGM.getNaturalTypeAlignment(SharedsTy));
3997   }
3998   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3999                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4000   CGF.FinishFunction();
4001   return TaskDup;
4002 }
4003 
4004 /// Checks if destructor function is required to be generated.
4005 /// \return true if cleanups are required, false otherwise.
4006 static bool
4007 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4008                          ArrayRef<PrivateDataTy> Privates) {
4009   for (const PrivateDataTy &P : Privates) {
4010     if (P.second.isLocalPrivate())
4011       continue;
4012     QualType Ty = P.second.Original->getType().getNonReferenceType();
4013     if (Ty.isDestructedType())
4014       return true;
4015   }
4016   return false;
4017 }
4018 
4019 namespace {
4020 /// Loop generator for OpenMP iterator expression.
4021 class OMPIteratorGeneratorScope final
4022     : public CodeGenFunction::OMPPrivateScope {
4023   CodeGenFunction &CGF;
4024   const OMPIteratorExpr *E = nullptr;
4025   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4026   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4027   OMPIteratorGeneratorScope() = delete;
4028   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4029 
4030 public:
4031   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4032       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4033     if (!E)
4034       return;
4035     SmallVector<llvm::Value *, 4> Uppers;
4036     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4037       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4038       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4039       addPrivate(VD, [&CGF, VD]() {
4040         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4041       });
4042       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4043       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4044         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4045                                  "counter.addr");
4046       });
4047     }
4048     Privatize();
4049 
4050     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4051       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4052       LValue CLVal =
4053           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4054                              HelperData.CounterVD->getType());
4055       // Counter = 0;
4056       CGF.EmitStoreOfScalar(
4057           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4058           CLVal);
4059       CodeGenFunction::JumpDest &ContDest =
4060           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4061       CodeGenFunction::JumpDest &ExitDest =
4062           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4063       // N = <number-of_iterations>;
4064       llvm::Value *N = Uppers[I];
4065       // cont:
4066       // if (Counter < N) goto body; else goto exit;
4067       CGF.EmitBlock(ContDest.getBlock());
4068       auto *CVal =
4069           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4070       llvm::Value *Cmp =
4071           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4072               ? CGF.Builder.CreateICmpSLT(CVal, N)
4073               : CGF.Builder.CreateICmpULT(CVal, N);
4074       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4075       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4076       // body:
4077       CGF.EmitBlock(BodyBB);
4078       // Iteri = Begini + Counter * Stepi;
4079       CGF.EmitIgnoredExpr(HelperData.Update);
4080     }
4081   }
4082   ~OMPIteratorGeneratorScope() {
4083     if (!E)
4084       return;
4085     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4086       // Counter = Counter + 1;
4087       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4088       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4089       // goto cont;
4090       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4091       // exit:
4092       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4093     }
4094   }
4095 };
4096 } // namespace
4097 
4098 static std::pair<llvm::Value *, llvm::Value *>
4099 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4100   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4101   llvm::Value *Addr;
4102   if (OASE) {
4103     const Expr *Base = OASE->getBase();
4104     Addr = CGF.EmitScalarExpr(Base);
4105   } else {
4106     Addr = CGF.EmitLValue(E).getPointer(CGF);
4107   }
4108   llvm::Value *SizeVal;
4109   QualType Ty = E->getType();
4110   if (OASE) {
4111     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4112     for (const Expr *SE : OASE->getDimensions()) {
4113       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4114       Sz = CGF.EmitScalarConversion(
4115           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4116       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4117     }
4118   } else if (const auto *ASE =
4119                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4120     LValue UpAddrLVal =
4121         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4122     llvm::Value *UpAddr =
4123         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4124     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4125     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4126     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4127   } else {
4128     SizeVal = CGF.getTypeSize(Ty);
4129   }
4130   return std::make_pair(Addr, SizeVal);
4131 }
4132 
4133 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4134 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4135   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4136   if (KmpTaskAffinityInfoTy.isNull()) {
4137     RecordDecl *KmpAffinityInfoRD =
4138         C.buildImplicitRecord("kmp_task_affinity_info_t");
4139     KmpAffinityInfoRD->startDefinition();
4140     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4141     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4142     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4143     KmpAffinityInfoRD->completeDefinition();
4144     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4145   }
4146 }
4147 
4148 CGOpenMPRuntime::TaskResultTy
4149 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4150                               const OMPExecutableDirective &D,
4151                               llvm::Function *TaskFunction, QualType SharedsTy,
4152                               Address Shareds, const OMPTaskDataTy &Data) {
4153   ASTContext &C = CGM.getContext();
4154   llvm::SmallVector<PrivateDataTy, 4> Privates;
4155   // Aggregate privates and sort them by the alignment.
4156   const auto *I = Data.PrivateCopies.begin();
4157   for (const Expr *E : Data.PrivateVars) {
4158     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4159     Privates.emplace_back(
4160         C.getDeclAlign(VD),
4161         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4162                          /*PrivateElemInit=*/nullptr));
4163     ++I;
4164   }
4165   I = Data.FirstprivateCopies.begin();
4166   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4167   for (const Expr *E : Data.FirstprivateVars) {
4168     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4169     Privates.emplace_back(
4170         C.getDeclAlign(VD),
4171         PrivateHelpersTy(
4172             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4173             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4174     ++I;
4175     ++IElemInitRef;
4176   }
4177   I = Data.LastprivateCopies.begin();
4178   for (const Expr *E : Data.LastprivateVars) {
4179     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4180     Privates.emplace_back(
4181         C.getDeclAlign(VD),
4182         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4183                          /*PrivateElemInit=*/nullptr));
4184     ++I;
4185   }
4186   for (const VarDecl *VD : Data.PrivateLocals) {
4187     if (isAllocatableDecl(VD))
4188       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4189     else
4190       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4191   }
4192   llvm::stable_sort(Privates,
4193                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4194                       return L.first > R.first;
4195                     });
4196   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4197   // Build type kmp_routine_entry_t (if not built yet).
4198   emitKmpRoutineEntryT(KmpInt32Ty);
4199   // Build type kmp_task_t (if not built yet).
4200   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4201     if (SavedKmpTaskloopTQTy.isNull()) {
4202       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4203           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4204     }
4205     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4206   } else {
4207     assert((D.getDirectiveKind() == OMPD_task ||
4208             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4209             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4210            "Expected taskloop, task or target directive");
4211     if (SavedKmpTaskTQTy.isNull()) {
4212       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4213           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4214     }
4215     KmpTaskTQTy = SavedKmpTaskTQTy;
4216   }
4217   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4218   // Build particular struct kmp_task_t for the given task.
4219   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4220       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4221   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4222   QualType KmpTaskTWithPrivatesPtrQTy =
4223       C.getPointerType(KmpTaskTWithPrivatesQTy);
4224   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4226       KmpTaskTWithPrivatesTy->getPointerTo();
4227   llvm::Value *KmpTaskTWithPrivatesTySize =
4228       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4229   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4230 
4231   // Emit initial values for private copies (if any).
4232   llvm::Value *TaskPrivatesMap = nullptr;
4233   llvm::Type *TaskPrivatesMapTy =
4234       std::next(TaskFunction->arg_begin(), 3)->getType();
4235   if (!Privates.empty()) {
4236     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4237     TaskPrivatesMap =
4238         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4239     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4240         TaskPrivatesMap, TaskPrivatesMapTy);
4241   } else {
4242     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4243         cast<llvm::PointerType>(TaskPrivatesMapTy));
4244   }
4245   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4246   // kmp_task_t *tt);
4247   llvm::Function *TaskEntry = emitProxyTaskFunction(
4248       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4249       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4250       TaskPrivatesMap);
4251 
4252   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4253   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4254   // kmp_routine_entry_t *task_entry);
4255   // Task flags. Format is taken from
4256   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4257   // description of kmp_tasking_flags struct.
4258   enum {
4259     TiedFlag = 0x1,
4260     FinalFlag = 0x2,
4261     DestructorsFlag = 0x8,
4262     PriorityFlag = 0x20,
4263     DetachableFlag = 0x40,
4264   };
4265   unsigned Flags = Data.Tied ? TiedFlag : 0;
4266   bool NeedsCleanup = false;
4267   if (!Privates.empty()) {
4268     NeedsCleanup =
4269         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4270     if (NeedsCleanup)
4271       Flags = Flags | DestructorsFlag;
4272   }
4273   if (Data.Priority.getInt())
4274     Flags = Flags | PriorityFlag;
4275   if (D.hasClausesOfKind<OMPDetachClause>())
4276     Flags = Flags | DetachableFlag;
4277   llvm::Value *TaskFlags =
4278       Data.Final.getPointer()
4279           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4280                                      CGF.Builder.getInt32(FinalFlag),
4281                                      CGF.Builder.getInt32(/*C=*/0))
4282           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4283   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4284   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4285   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4286       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4287       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4288           TaskEntry, KmpRoutineEntryPtrTy)};
4289   llvm::Value *NewTask;
4290   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4291     // Check if we have any device clause associated with the directive.
4292     const Expr *Device = nullptr;
4293     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4294       Device = C->getDevice();
4295     // Emit device ID if any otherwise use default value.
4296     llvm::Value *DeviceID;
4297     if (Device)
4298       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4299                                            CGF.Int64Ty, /*isSigned=*/true);
4300     else
4301       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4302     AllocArgs.push_back(DeviceID);
4303     NewTask = CGF.EmitRuntimeCall(
4304         OMPBuilder.getOrCreateRuntimeFunction(
4305             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4306         AllocArgs);
4307   } else {
4308     NewTask =
4309         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4310                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4311                             AllocArgs);
4312   }
4313   // Emit detach clause initialization.
4314   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4315   // task_descriptor);
4316   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4317     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4318     LValue EvtLVal = CGF.EmitLValue(Evt);
4319 
4320     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4321     // int gtid, kmp_task_t *task);
4322     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4323     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4324     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4325     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4326         OMPBuilder.getOrCreateRuntimeFunction(
4327             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4328         {Loc, Tid, NewTask});
4329     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4330                                       Evt->getExprLoc());
4331     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4332   }
4333   // Process affinity clauses.
4334   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4335     // Process list of affinity data.
4336     ASTContext &C = CGM.getContext();
4337     Address AffinitiesArray = Address::invalid();
4338     // Calculate number of elements to form the array of affinity data.
4339     llvm::Value *NumOfElements = nullptr;
4340     unsigned NumAffinities = 0;
4341     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4342       if (const Expr *Modifier = C->getModifier()) {
4343         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4344         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4345           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4346           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4347           NumOfElements =
4348               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4349         }
4350       } else {
4351         NumAffinities += C->varlist_size();
4352       }
4353     }
4354     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4355     // Fields ids in kmp_task_affinity_info record.
4356     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4357 
4358     QualType KmpTaskAffinityInfoArrayTy;
4359     if (NumOfElements) {
4360       NumOfElements = CGF.Builder.CreateNUWAdd(
4361           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4362       OpaqueValueExpr OVE(
4363           Loc,
4364           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4365           VK_RValue);
4366       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4367                                                     RValue::get(NumOfElements));
4368       KmpTaskAffinityInfoArrayTy =
4369           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4370                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4371       // Properly emit variable-sized array.
4372       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4373                                            ImplicitParamDecl::Other);
4374       CGF.EmitVarDecl(*PD);
4375       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4376       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4377                                                 /*isSigned=*/false);
4378     } else {
4379       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4380           KmpTaskAffinityInfoTy,
4381           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4382           ArrayType::Normal, /*IndexTypeQuals=*/0);
4383       AffinitiesArray =
4384           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4385       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4386       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4387                                              /*isSigned=*/false);
4388     }
4389 
4390     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4391     // Fill array by elements without iterators.
4392     unsigned Pos = 0;
4393     bool HasIterator = false;
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       if (C->getModifier()) {
4396         HasIterator = true;
4397         continue;
4398       }
4399       for (const Expr *E : C->varlists()) {
4400         llvm::Value *Addr;
4401         llvm::Value *Size;
4402         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4403         LValue Base =
4404             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4405                                KmpTaskAffinityInfoTy);
4406         // affs[i].base_addr = &<Affinities[i].second>;
4407         LValue BaseAddrLVal = CGF.EmitLValueForField(
4408             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4409         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4410                               BaseAddrLVal);
4411         // affs[i].len = sizeof(<Affinities[i].second>);
4412         LValue LenLVal = CGF.EmitLValueForField(
4413             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4414         CGF.EmitStoreOfScalar(Size, LenLVal);
4415         ++Pos;
4416       }
4417     }
4418     LValue PosLVal;
4419     if (HasIterator) {
4420       PosLVal = CGF.MakeAddrLValue(
4421           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4422           C.getSizeType());
4423       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4424     }
4425     // Process elements with iterators.
4426     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4427       const Expr *Modifier = C->getModifier();
4428       if (!Modifier)
4429         continue;
4430       OMPIteratorGeneratorScope IteratorScope(
4431           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4432       for (const Expr *E : C->varlists()) {
4433         llvm::Value *Addr;
4434         llvm::Value *Size;
4435         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4436         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4437         LValue Base = CGF.MakeAddrLValue(
4438             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4439                     AffinitiesArray.getAlignment()),
4440             KmpTaskAffinityInfoTy);
4441         // affs[i].base_addr = &<Affinities[i].second>;
4442         LValue BaseAddrLVal = CGF.EmitLValueForField(
4443             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4444         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4445                               BaseAddrLVal);
4446         // affs[i].len = sizeof(<Affinities[i].second>);
4447         LValue LenLVal = CGF.EmitLValueForField(
4448             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4449         CGF.EmitStoreOfScalar(Size, LenLVal);
4450         Idx = CGF.Builder.CreateNUWAdd(
4451             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4452         CGF.EmitStoreOfScalar(Idx, PosLVal);
4453       }
4454     }
4455     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4456     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4457     // naffins, kmp_task_affinity_info_t *affin_list);
4458     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4459     llvm::Value *GTid = getThreadID(CGF, Loc);
4460     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4461         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4462     // FIXME: Emit the function and ignore its result for now unless the
4463     // runtime function is properly implemented.
4464     (void)CGF.EmitRuntimeCall(
4465         OMPBuilder.getOrCreateRuntimeFunction(
4466             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4467         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4468   }
4469   llvm::Value *NewTaskNewTaskTTy =
4470       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4471           NewTask, KmpTaskTWithPrivatesPtrTy);
4472   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4473                                                KmpTaskTWithPrivatesQTy);
4474   LValue TDBase =
4475       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4476   // Fill the data in the resulting kmp_task_t record.
4477   // Copy shareds if there are any.
4478   Address KmpTaskSharedsPtr = Address::invalid();
4479   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4480     KmpTaskSharedsPtr =
4481         Address(CGF.EmitLoadOfScalar(
4482                     CGF.EmitLValueForField(
4483                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4484                                            KmpTaskTShareds)),
4485                     Loc),
4486                 CGM.getNaturalTypeAlignment(SharedsTy));
4487     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4488     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4489     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4490   }
4491   // Emit initial values for private copies (if any).
4492   TaskResultTy Result;
4493   if (!Privates.empty()) {
4494     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4495                      SharedsTy, SharedsPtrTy, Data, Privates,
4496                      /*ForDup=*/false);
4497     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4498         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4499       Result.TaskDupFn = emitTaskDupFunction(
4500           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4501           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4502           /*WithLastIter=*/!Data.LastprivateVars.empty());
4503     }
4504   }
4505   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4506   enum { Priority = 0, Destructors = 1 };
4507   // Provide pointer to function with destructors for privates.
4508   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4509   const RecordDecl *KmpCmplrdataUD =
4510       (*FI)->getType()->getAsUnionType()->getDecl();
4511   if (NeedsCleanup) {
4512     llvm::Value *DestructorFn = emitDestructorsFunction(
4513         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4514         KmpTaskTWithPrivatesQTy);
4515     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4516     LValue DestructorsLV = CGF.EmitLValueForField(
4517         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4518     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4519                               DestructorFn, KmpRoutineEntryPtrTy),
4520                           DestructorsLV);
4521   }
4522   // Set priority.
4523   if (Data.Priority.getInt()) {
4524     LValue Data2LV = CGF.EmitLValueForField(
4525         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4526     LValue PriorityLV = CGF.EmitLValueForField(
4527         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4528     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4529   }
4530   Result.NewTask = NewTask;
4531   Result.TaskEntry = TaskEntry;
4532   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4533   Result.TDBase = TDBase;
4534   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4535   return Result;
4536 }
4537 
4538 namespace {
4539 /// Dependence kind for RTL.
4540 enum RTLDependenceKindTy {
4541   DepIn = 0x01,
4542   DepInOut = 0x3,
4543   DepMutexInOutSet = 0x4
4544 };
4545 /// Fields ids in kmp_depend_info record.
4546 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4547 } // namespace
4548 
4549 /// Translates internal dependency kind into the runtime kind.
4550 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4551   RTLDependenceKindTy DepKind;
4552   switch (K) {
4553   case OMPC_DEPEND_in:
4554     DepKind = DepIn;
4555     break;
4556   // Out and InOut dependencies must use the same code.
4557   case OMPC_DEPEND_out:
4558   case OMPC_DEPEND_inout:
4559     DepKind = DepInOut;
4560     break;
4561   case OMPC_DEPEND_mutexinoutset:
4562     DepKind = DepMutexInOutSet;
4563     break;
4564   case OMPC_DEPEND_source:
4565   case OMPC_DEPEND_sink:
4566   case OMPC_DEPEND_depobj:
4567   case OMPC_DEPEND_unknown:
4568     llvm_unreachable("Unknown task dependence type");
4569   }
4570   return DepKind;
4571 }
4572 
4573 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4574 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4575                            QualType &FlagsTy) {
4576   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4577   if (KmpDependInfoTy.isNull()) {
4578     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4579     KmpDependInfoRD->startDefinition();
4580     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4583     KmpDependInfoRD->completeDefinition();
4584     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4585   }
4586 }
4587 
4588 std::pair<llvm::Value *, LValue>
4589 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4590                                    SourceLocation Loc) {
4591   ASTContext &C = CGM.getContext();
4592   QualType FlagsTy;
4593   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4594   RecordDecl *KmpDependInfoRD =
4595       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4596   LValue Base = CGF.EmitLoadOfPointerLValue(
4597       DepobjLVal.getAddress(CGF),
4598       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4599   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4600   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4601           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4602   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4603                             Base.getTBAAInfo());
4604   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4605       Addr.getPointer(),
4606       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4607   LValue NumDepsBase = CGF.MakeAddrLValue(
4608       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4609       Base.getBaseInfo(), Base.getTBAAInfo());
4610   // NumDeps = deps[i].base_addr;
4611   LValue BaseAddrLVal = CGF.EmitLValueForField(
4612       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4613   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4614   return std::make_pair(NumDeps, Base);
4615 }
4616 
4617 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4618                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4619                            const OMPTaskDataTy::DependData &Data,
4620                            Address DependenciesArray) {
4621   CodeGenModule &CGM = CGF.CGM;
4622   ASTContext &C = CGM.getContext();
4623   QualType FlagsTy;
4624   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4625   RecordDecl *KmpDependInfoRD =
4626       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4627   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4628 
4629   OMPIteratorGeneratorScope IteratorScope(
4630       CGF, cast_or_null<OMPIteratorExpr>(
4631                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4632                                  : nullptr));
4633   for (const Expr *E : Data.DepExprs) {
4634     llvm::Value *Addr;
4635     llvm::Value *Size;
4636     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4637     LValue Base;
4638     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4639       Base = CGF.MakeAddrLValue(
4640           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4641     } else {
4642       LValue &PosLVal = *Pos.get<LValue *>();
4643       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4644       Base = CGF.MakeAddrLValue(
4645           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4646                   DependenciesArray.getAlignment()),
4647           KmpDependInfoTy);
4648     }
4649     // deps[i].base_addr = &<Dependencies[i].second>;
4650     LValue BaseAddrLVal = CGF.EmitLValueForField(
4651         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4652     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4653                           BaseAddrLVal);
4654     // deps[i].len = sizeof(<Dependencies[i].second>);
4655     LValue LenLVal = CGF.EmitLValueForField(
4656         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4657     CGF.EmitStoreOfScalar(Size, LenLVal);
4658     // deps[i].flags = <Dependencies[i].first>;
4659     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4660     LValue FlagsLVal = CGF.EmitLValueForField(
4661         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4662     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4663                           FlagsLVal);
4664     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4665       ++(*P);
4666     } else {
4667       LValue &PosLVal = *Pos.get<LValue *>();
4668       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4669       Idx = CGF.Builder.CreateNUWAdd(Idx,
4670                                      llvm::ConstantInt::get(Idx->getType(), 1));
4671       CGF.EmitStoreOfScalar(Idx, PosLVal);
4672     }
4673   }
4674 }
4675 
4676 static SmallVector<llvm::Value *, 4>
4677 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4678                         const OMPTaskDataTy::DependData &Data) {
4679   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4680          "Expected depobj dependecy kind.");
4681   SmallVector<llvm::Value *, 4> Sizes;
4682   SmallVector<LValue, 4> SizeLVals;
4683   ASTContext &C = CGF.getContext();
4684   QualType FlagsTy;
4685   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4686   RecordDecl *KmpDependInfoRD =
4687       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4688   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4689   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4690   {
4691     OMPIteratorGeneratorScope IteratorScope(
4692         CGF, cast_or_null<OMPIteratorExpr>(
4693                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4694                                    : nullptr));
4695     for (const Expr *E : Data.DepExprs) {
4696       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4697       LValue Base = CGF.EmitLoadOfPointerLValue(
4698           DepobjLVal.getAddress(CGF),
4699           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4700       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4701           Base.getAddress(CGF), KmpDependInfoPtrT);
4702       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4703                                 Base.getTBAAInfo());
4704       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4705           Addr.getPointer(),
4706           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4707       LValue NumDepsBase = CGF.MakeAddrLValue(
4708           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4709           Base.getBaseInfo(), Base.getTBAAInfo());
4710       // NumDeps = deps[i].base_addr;
4711       LValue BaseAddrLVal = CGF.EmitLValueForField(
4712           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4713       llvm::Value *NumDeps =
4714           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4715       LValue NumLVal = CGF.MakeAddrLValue(
4716           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4717           C.getUIntPtrType());
4718       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4719                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4720       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4721       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4722       CGF.EmitStoreOfScalar(Add, NumLVal);
4723       SizeLVals.push_back(NumLVal);
4724     }
4725   }
4726   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4727     llvm::Value *Size =
4728         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4729     Sizes.push_back(Size);
4730   }
4731   return Sizes;
4732 }
4733 
4734 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4735                                LValue PosLVal,
4736                                const OMPTaskDataTy::DependData &Data,
4737                                Address DependenciesArray) {
4738   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4739          "Expected depobj dependecy kind.");
4740   ASTContext &C = CGF.getContext();
4741   QualType FlagsTy;
4742   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4743   RecordDecl *KmpDependInfoRD =
4744       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4745   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4746   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4747   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4748   {
4749     OMPIteratorGeneratorScope IteratorScope(
4750         CGF, cast_or_null<OMPIteratorExpr>(
4751                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4752                                    : nullptr));
4753     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4754       const Expr *E = Data.DepExprs[I];
4755       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4756       LValue Base = CGF.EmitLoadOfPointerLValue(
4757           DepobjLVal.getAddress(CGF),
4758           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4759       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4760           Base.getAddress(CGF), KmpDependInfoPtrT);
4761       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4762                                 Base.getTBAAInfo());
4763 
4764       // Get number of elements in a single depobj.
4765       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4766           Addr.getPointer(),
4767           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4768       LValue NumDepsBase = CGF.MakeAddrLValue(
4769           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4770           Base.getBaseInfo(), Base.getTBAAInfo());
4771       // NumDeps = deps[i].base_addr;
4772       LValue BaseAddrLVal = CGF.EmitLValueForField(
4773           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4774       llvm::Value *NumDeps =
4775           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4776 
4777       // memcopy dependency data.
4778       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4779           ElSize,
4780           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4781       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4782       Address DepAddr =
4783           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4784                   DependenciesArray.getAlignment());
4785       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4786 
4787       // Increase pos.
4788       // pos += size;
4789       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4790       CGF.EmitStoreOfScalar(Add, PosLVal);
4791     }
4792   }
4793 }
4794 
4795 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4796     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4797     SourceLocation Loc) {
4798   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4799         return D.DepExprs.empty();
4800       }))
4801     return std::make_pair(nullptr, Address::invalid());
4802   // Process list of dependencies.
4803   ASTContext &C = CGM.getContext();
4804   Address DependenciesArray = Address::invalid();
4805   llvm::Value *NumOfElements = nullptr;
4806   unsigned NumDependencies = std::accumulate(
4807       Dependencies.begin(), Dependencies.end(), 0,
4808       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4809         return D.DepKind == OMPC_DEPEND_depobj
4810                    ? V
4811                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4812       });
4813   QualType FlagsTy;
4814   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4815   bool HasDepobjDeps = false;
4816   bool HasRegularWithIterators = false;
4817   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4818   llvm::Value *NumOfRegularWithIterators =
4819       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4820   // Calculate number of depobj dependecies and regular deps with the iterators.
4821   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4822     if (D.DepKind == OMPC_DEPEND_depobj) {
4823       SmallVector<llvm::Value *, 4> Sizes =
4824           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4825       for (llvm::Value *Size : Sizes) {
4826         NumOfDepobjElements =
4827             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4828       }
4829       HasDepobjDeps = true;
4830       continue;
4831     }
4832     // Include number of iterations, if any.
4833     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4834       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4835         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4836         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4837         NumOfRegularWithIterators =
4838             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4839       }
4840       HasRegularWithIterators = true;
4841       continue;
4842     }
4843   }
4844 
4845   QualType KmpDependInfoArrayTy;
4846   if (HasDepobjDeps || HasRegularWithIterators) {
4847     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4848                                            /*isSigned=*/false);
4849     if (HasDepobjDeps) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4852     }
4853     if (HasRegularWithIterators) {
4854       NumOfElements =
4855           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4856     }
4857     OpaqueValueExpr OVE(Loc,
4858                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4859                         VK_RValue);
4860     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4861                                                   RValue::get(NumOfElements));
4862     KmpDependInfoArrayTy =
4863         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4864                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4865     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4866     // Properly emit variable-sized array.
4867     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4868                                          ImplicitParamDecl::Other);
4869     CGF.EmitVarDecl(*PD);
4870     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4871     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4872                                               /*isSigned=*/false);
4873   } else {
4874     KmpDependInfoArrayTy = C.getConstantArrayType(
4875         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4876         ArrayType::Normal, /*IndexTypeQuals=*/0);
4877     DependenciesArray =
4878         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4879     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4880     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4881                                            /*isSigned=*/false);
4882   }
4883   unsigned Pos = 0;
4884   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4885     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4886         Dependencies[I].IteratorExpr)
4887       continue;
4888     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4889                    DependenciesArray);
4890   }
4891   // Copy regular dependecies with iterators.
4892   LValue PosLVal = CGF.MakeAddrLValue(
4893       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4894   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4895   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4896     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4897         !Dependencies[I].IteratorExpr)
4898       continue;
4899     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4900                    DependenciesArray);
4901   }
4902   // Copy final depobj arrays without iterators.
4903   if (HasDepobjDeps) {
4904     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4905       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4906         continue;
4907       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4908                          DependenciesArray);
4909     }
4910   }
4911   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4912       DependenciesArray, CGF.VoidPtrTy);
4913   return std::make_pair(NumOfElements, DependenciesArray);
4914 }
4915 
4916 Address CGOpenMPRuntime::emitDepobjDependClause(
4917     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4918     SourceLocation Loc) {
4919   if (Dependencies.DepExprs.empty())
4920     return Address::invalid();
4921   // Process list of dependencies.
4922   ASTContext &C = CGM.getContext();
4923   Address DependenciesArray = Address::invalid();
4924   unsigned NumDependencies = Dependencies.DepExprs.size();
4925   QualType FlagsTy;
4926   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4927   RecordDecl *KmpDependInfoRD =
4928       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4929 
4930   llvm::Value *Size;
4931   // Define type kmp_depend_info[<Dependencies.size()>];
4932   // For depobj reserve one extra element to store the number of elements.
4933   // It is required to handle depobj(x) update(in) construct.
4934   // kmp_depend_info[<Dependencies.size()>] deps;
4935   llvm::Value *NumDepsVal;
4936   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4937   if (const auto *IE =
4938           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4939     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4940     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4941       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4942       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4943       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4944     }
4945     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4946                                     NumDepsVal);
4947     CharUnits SizeInBytes =
4948         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4949     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4950     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4951     NumDepsVal =
4952         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4953   } else {
4954     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4955         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4956         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4957     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4958     Size = CGM.getSize(Sz.alignTo(Align));
4959     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4960   }
4961   // Need to allocate on the dynamic memory.
4962   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4963   // Use default allocator.
4964   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4965   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4966 
4967   llvm::Value *Addr =
4968       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4969                               CGM.getModule(), OMPRTL___kmpc_alloc),
4970                           Args, ".dep.arr.addr");
4971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4972       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4973   DependenciesArray = Address(Addr, Align);
4974   // Write number of elements in the first element of array for depobj.
4975   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4976   // deps[i].base_addr = NumDependencies;
4977   LValue BaseAddrLVal = CGF.EmitLValueForField(
4978       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4979   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4980   llvm::PointerUnion<unsigned *, LValue *> Pos;
4981   unsigned Idx = 1;
4982   LValue PosLVal;
4983   if (Dependencies.IteratorExpr) {
4984     PosLVal = CGF.MakeAddrLValue(
4985         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4986         C.getSizeType());
4987     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4988                           /*IsInit=*/true);
4989     Pos = &PosLVal;
4990   } else {
4991     Pos = &Idx;
4992   }
4993   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4994   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4995       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4996   return DependenciesArray;
4997 }
4998 
4999 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5000                                         SourceLocation Loc) {
5001   ASTContext &C = CGM.getContext();
5002   QualType FlagsTy;
5003   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5004   LValue Base = CGF.EmitLoadOfPointerLValue(
5005       DepobjLVal.getAddress(CGF),
5006       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5007   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5008   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5009       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5010   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5011       Addr.getPointer(),
5012       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5013   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5014                                                                CGF.VoidPtrTy);
5015   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5016   // Use default allocator.
5017   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5018   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5019 
5020   // _kmpc_free(gtid, addr, nullptr);
5021   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5022                                 CGM.getModule(), OMPRTL___kmpc_free),
5023                             Args);
5024 }
5025 
5026 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5027                                        OpenMPDependClauseKind NewDepKind,
5028                                        SourceLocation Loc) {
5029   ASTContext &C = CGM.getContext();
5030   QualType FlagsTy;
5031   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5032   RecordDecl *KmpDependInfoRD =
5033       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5034   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5035   llvm::Value *NumDeps;
5036   LValue Base;
5037   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5038 
5039   Address Begin = Base.getAddress(CGF);
5040   // Cast from pointer to array type to pointer to single element.
5041   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5042   // The basic structure here is a while-do loop.
5043   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5044   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5045   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5046   CGF.EmitBlock(BodyBB);
5047   llvm::PHINode *ElementPHI =
5048       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5049   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5050   Begin = Address(ElementPHI, Begin.getAlignment());
5051   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5052                             Base.getTBAAInfo());
5053   // deps[i].flags = NewDepKind;
5054   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5055   LValue FlagsLVal = CGF.EmitLValueForField(
5056       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5057   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5058                         FlagsLVal);
5059 
5060   // Shift the address forward by one element.
5061   Address ElementNext =
5062       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5063   ElementPHI->addIncoming(ElementNext.getPointer(),
5064                           CGF.Builder.GetInsertBlock());
5065   llvm::Value *IsEmpty =
5066       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5067   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5068   // Done.
5069   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5070 }
5071 
5072 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5073                                    const OMPExecutableDirective &D,
5074                                    llvm::Function *TaskFunction,
5075                                    QualType SharedsTy, Address Shareds,
5076                                    const Expr *IfCond,
5077                                    const OMPTaskDataTy &Data) {
5078   if (!CGF.HaveInsertPoint())
5079     return;
5080 
5081   TaskResultTy Result =
5082       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5083   llvm::Value *NewTask = Result.NewTask;
5084   llvm::Function *TaskEntry = Result.TaskEntry;
5085   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5086   LValue TDBase = Result.TDBase;
5087   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5088   // Process list of dependences.
5089   Address DependenciesArray = Address::invalid();
5090   llvm::Value *NumOfElements;
5091   std::tie(NumOfElements, DependenciesArray) =
5092       emitDependClause(CGF, Data.Dependences, Loc);
5093 
5094   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5095   // libcall.
5096   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5097   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5098   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5099   // list is not empty
5100   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5101   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5102   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5103   llvm::Value *DepTaskArgs[7];
5104   if (!Data.Dependences.empty()) {
5105     DepTaskArgs[0] = UpLoc;
5106     DepTaskArgs[1] = ThreadID;
5107     DepTaskArgs[2] = NewTask;
5108     DepTaskArgs[3] = NumOfElements;
5109     DepTaskArgs[4] = DependenciesArray.getPointer();
5110     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5111     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5112   }
5113   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5114                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5115     if (!Data.Tied) {
5116       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5117       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5118       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5119     }
5120     if (!Data.Dependences.empty()) {
5121       CGF.EmitRuntimeCall(
5122           OMPBuilder.getOrCreateRuntimeFunction(
5123               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5124           DepTaskArgs);
5125     } else {
5126       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5127                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5128                           TaskArgs);
5129     }
5130     // Check if parent region is untied and build return for untied task;
5131     if (auto *Region =
5132             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5133       Region->emitUntiedSwitch(CGF);
5134   };
5135 
5136   llvm::Value *DepWaitTaskArgs[6];
5137   if (!Data.Dependences.empty()) {
5138     DepWaitTaskArgs[0] = UpLoc;
5139     DepWaitTaskArgs[1] = ThreadID;
5140     DepWaitTaskArgs[2] = NumOfElements;
5141     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5142     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5143     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5144   }
5145   auto &M = CGM.getModule();
5146   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5147                         TaskEntry, &Data, &DepWaitTaskArgs,
5148                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5149     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5150     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5151     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5152     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5153     // is specified.
5154     if (!Data.Dependences.empty())
5155       CGF.EmitRuntimeCall(
5156           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5157           DepWaitTaskArgs);
5158     // Call proxy_task_entry(gtid, new_task);
5159     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5160                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5161       Action.Enter(CGF);
5162       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5163       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5164                                                           OutlinedFnArgs);
5165     };
5166 
5167     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5168     // kmp_task_t *new_task);
5169     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5170     // kmp_task_t *new_task);
5171     RegionCodeGenTy RCG(CodeGen);
5172     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5173                               M, OMPRTL___kmpc_omp_task_begin_if0),
5174                           TaskArgs,
5175                           OMPBuilder.getOrCreateRuntimeFunction(
5176                               M, OMPRTL___kmpc_omp_task_complete_if0),
5177                           TaskArgs);
5178     RCG.setAction(Action);
5179     RCG(CGF);
5180   };
5181 
5182   if (IfCond) {
5183     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5184   } else {
5185     RegionCodeGenTy ThenRCG(ThenCodeGen);
5186     ThenRCG(CGF);
5187   }
5188 }
5189 
5190 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5191                                        const OMPLoopDirective &D,
5192                                        llvm::Function *TaskFunction,
5193                                        QualType SharedsTy, Address Shareds,
5194                                        const Expr *IfCond,
5195                                        const OMPTaskDataTy &Data) {
5196   if (!CGF.HaveInsertPoint())
5197     return;
5198   TaskResultTy Result =
5199       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5200   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5201   // libcall.
5202   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5203   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5204   // sched, kmp_uint64 grainsize, void *task_dup);
5205   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5206   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5207   llvm::Value *IfVal;
5208   if (IfCond) {
5209     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5210                                       /*isSigned=*/true);
5211   } else {
5212     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5213   }
5214 
5215   LValue LBLVal = CGF.EmitLValueForField(
5216       Result.TDBase,
5217       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5218   const auto *LBVar =
5219       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5220   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5221                        LBLVal.getQuals(),
5222                        /*IsInitializer=*/true);
5223   LValue UBLVal = CGF.EmitLValueForField(
5224       Result.TDBase,
5225       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5226   const auto *UBVar =
5227       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5228   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5229                        UBLVal.getQuals(),
5230                        /*IsInitializer=*/true);
5231   LValue StLVal = CGF.EmitLValueForField(
5232       Result.TDBase,
5233       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5234   const auto *StVar =
5235       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5236   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5237                        StLVal.getQuals(),
5238                        /*IsInitializer=*/true);
5239   // Store reductions address.
5240   LValue RedLVal = CGF.EmitLValueForField(
5241       Result.TDBase,
5242       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5243   if (Data.Reductions) {
5244     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5245   } else {
5246     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5247                                CGF.getContext().VoidPtrTy);
5248   }
5249   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5250   llvm::Value *TaskArgs[] = {
5251       UpLoc,
5252       ThreadID,
5253       Result.NewTask,
5254       IfVal,
5255       LBLVal.getPointer(CGF),
5256       UBLVal.getPointer(CGF),
5257       CGF.EmitLoadOfScalar(StLVal, Loc),
5258       llvm::ConstantInt::getSigned(
5259           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5260       llvm::ConstantInt::getSigned(
5261           CGF.IntTy, Data.Schedule.getPointer()
5262                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5263                          : NoSchedule),
5264       Data.Schedule.getPointer()
5265           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5266                                       /*isSigned=*/false)
5267           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5268       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5269                              Result.TaskDupFn, CGF.VoidPtrTy)
5270                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5271   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5272                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5273                       TaskArgs);
5274 }
5275 
5276 /// Emit reduction operation for each element of array (required for
5277 /// array sections) LHS op = RHS.
5278 /// \param Type Type of array.
5279 /// \param LHSVar Variable on the left side of the reduction operation
5280 /// (references element of array in original variable).
5281 /// \param RHSVar Variable on the right side of the reduction operation
5282 /// (references element of array in original variable).
5283 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5284 /// RHSVar.
5285 static void EmitOMPAggregateReduction(
5286     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5287     const VarDecl *RHSVar,
5288     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5289                                   const Expr *, const Expr *)> &RedOpGen,
5290     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5291     const Expr *UpExpr = nullptr) {
5292   // Perform element-by-element initialization.
5293   QualType ElementTy;
5294   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5295   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5296 
5297   // Drill down to the base element type on both arrays.
5298   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5299   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5300 
5301   llvm::Value *RHSBegin = RHSAddr.getPointer();
5302   llvm::Value *LHSBegin = LHSAddr.getPointer();
5303   // Cast from pointer to array type to pointer to single element.
5304   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5305   // The basic structure here is a while-do loop.
5306   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5307   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5308   llvm::Value *IsEmpty =
5309       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5310   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5311 
5312   // Enter the loop body, making that address the current address.
5313   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5314   CGF.EmitBlock(BodyBB);
5315 
5316   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5317 
5318   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5319       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5320   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5321   Address RHSElementCurrent =
5322       Address(RHSElementPHI,
5323               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5324 
5325   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5326       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5327   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5328   Address LHSElementCurrent =
5329       Address(LHSElementPHI,
5330               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5331 
5332   // Emit copy.
5333   CodeGenFunction::OMPPrivateScope Scope(CGF);
5334   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5335   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5336   Scope.Privatize();
5337   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5338   Scope.ForceCleanup();
5339 
5340   // Shift the address forward by one element.
5341   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5342       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5343   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5344       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5345   // Check whether we've reached the end.
5346   llvm::Value *Done =
5347       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5348   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5349   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5350   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5351 
5352   // Done.
5353   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5354 }
5355 
5356 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5357 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5358 /// UDR combiner function.
5359 static void emitReductionCombiner(CodeGenFunction &CGF,
5360                                   const Expr *ReductionOp) {
5361   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5362     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5363       if (const auto *DRE =
5364               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5365         if (const auto *DRD =
5366                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5367           std::pair<llvm::Function *, llvm::Function *> Reduction =
5368               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5369           RValue Func = RValue::get(Reduction.first);
5370           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5371           CGF.EmitIgnoredExpr(ReductionOp);
5372           return;
5373         }
5374   CGF.EmitIgnoredExpr(ReductionOp);
5375 }
5376 
5377 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5378     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5379     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5380     ArrayRef<const Expr *> ReductionOps) {
5381   ASTContext &C = CGM.getContext();
5382 
5383   // void reduction_func(void *LHSArg, void *RHSArg);
5384   FunctionArgList Args;
5385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5386                            ImplicitParamDecl::Other);
5387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388                            ImplicitParamDecl::Other);
5389   Args.push_back(&LHSArg);
5390   Args.push_back(&RHSArg);
5391   const auto &CGFI =
5392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5393   std::string Name = getName({"omp", "reduction", "reduction_func"});
5394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5395                                     llvm::GlobalValue::InternalLinkage, Name,
5396                                     &CGM.getModule());
5397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5398   Fn->setDoesNotRecurse();
5399   CodeGenFunction CGF(CGM);
5400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5401 
5402   // Dst = (void*[n])(LHSArg);
5403   // Src = (void*[n])(RHSArg);
5404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5406       ArgsType), CGF.getPointerAlign());
5407   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5409       ArgsType), CGF.getPointerAlign());
5410 
5411   //  ...
5412   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5413   //  ...
5414   CodeGenFunction::OMPPrivateScope Scope(CGF);
5415   auto IPriv = Privates.begin();
5416   unsigned Idx = 0;
5417   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5418     const auto *RHSVar =
5419         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5420     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5421       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5422     });
5423     const auto *LHSVar =
5424         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5425     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5426       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5427     });
5428     QualType PrivTy = (*IPriv)->getType();
5429     if (PrivTy->isVariablyModifiedType()) {
5430       // Get array size and emit VLA type.
5431       ++Idx;
5432       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5433       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5434       const VariableArrayType *VLA =
5435           CGF.getContext().getAsVariableArrayType(PrivTy);
5436       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5437       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5438           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5439       CGF.EmitVariablyModifiedType(PrivTy);
5440     }
5441   }
5442   Scope.Privatize();
5443   IPriv = Privates.begin();
5444   auto ILHS = LHSExprs.begin();
5445   auto IRHS = RHSExprs.begin();
5446   for (const Expr *E : ReductionOps) {
5447     if ((*IPriv)->getType()->isArrayType()) {
5448       // Emit reduction for array section.
5449       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5450       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5451       EmitOMPAggregateReduction(
5452           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5453           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5454             emitReductionCombiner(CGF, E);
5455           });
5456     } else {
5457       // Emit reduction for array subscript or single variable.
5458       emitReductionCombiner(CGF, E);
5459     }
5460     ++IPriv;
5461     ++ILHS;
5462     ++IRHS;
5463   }
5464   Scope.ForceCleanup();
5465   CGF.FinishFunction();
5466   return Fn;
5467 }
5468 
5469 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5470                                                   const Expr *ReductionOp,
5471                                                   const Expr *PrivateRef,
5472                                                   const DeclRefExpr *LHS,
5473                                                   const DeclRefExpr *RHS) {
5474   if (PrivateRef->getType()->isArrayType()) {
5475     // Emit reduction for array section.
5476     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5477     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5478     EmitOMPAggregateReduction(
5479         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5480         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481           emitReductionCombiner(CGF, ReductionOp);
5482         });
5483   } else {
5484     // Emit reduction for array subscript or single variable.
5485     emitReductionCombiner(CGF, ReductionOp);
5486   }
5487 }
5488 
5489 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5490                                     ArrayRef<const Expr *> Privates,
5491                                     ArrayRef<const Expr *> LHSExprs,
5492                                     ArrayRef<const Expr *> RHSExprs,
5493                                     ArrayRef<const Expr *> ReductionOps,
5494                                     ReductionOptionsTy Options) {
5495   if (!CGF.HaveInsertPoint())
5496     return;
5497 
5498   bool WithNowait = Options.WithNowait;
5499   bool SimpleReduction = Options.SimpleReduction;
5500 
5501   // Next code should be emitted for reduction:
5502   //
5503   // static kmp_critical_name lock = { 0 };
5504   //
5505   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5506   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5507   //  ...
5508   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5509   //  *(Type<n>-1*)rhs[<n>-1]);
5510   // }
5511   //
5512   // ...
5513   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5514   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5515   // RedList, reduce_func, &<lock>)) {
5516   // case 1:
5517   //  ...
5518   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5519   //  ...
5520   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5521   // break;
5522   // case 2:
5523   //  ...
5524   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5525   //  ...
5526   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5527   // break;
5528   // default:;
5529   // }
5530   //
5531   // if SimpleReduction is true, only the next code is generated:
5532   //  ...
5533   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5534   //  ...
5535 
5536   ASTContext &C = CGM.getContext();
5537 
5538   if (SimpleReduction) {
5539     CodeGenFunction::RunCleanupsScope Scope(CGF);
5540     auto IPriv = Privates.begin();
5541     auto ILHS = LHSExprs.begin();
5542     auto IRHS = RHSExprs.begin();
5543     for (const Expr *E : ReductionOps) {
5544       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5545                                   cast<DeclRefExpr>(*IRHS));
5546       ++IPriv;
5547       ++ILHS;
5548       ++IRHS;
5549     }
5550     return;
5551   }
5552 
5553   // 1. Build a list of reduction variables.
5554   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5555   auto Size = RHSExprs.size();
5556   for (const Expr *E : Privates) {
5557     if (E->getType()->isVariablyModifiedType())
5558       // Reserve place for array size.
5559       ++Size;
5560   }
5561   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5562   QualType ReductionArrayTy =
5563       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5564                              /*IndexTypeQuals=*/0);
5565   Address ReductionList =
5566       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5567   auto IPriv = Privates.begin();
5568   unsigned Idx = 0;
5569   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5570     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5571     CGF.Builder.CreateStore(
5572         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5573             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5574         Elem);
5575     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5576       // Store array size.
5577       ++Idx;
5578       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5579       llvm::Value *Size = CGF.Builder.CreateIntCast(
5580           CGF.getVLASize(
5581                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5582               .NumElts,
5583           CGF.SizeTy, /*isSigned=*/false);
5584       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5585                               Elem);
5586     }
5587   }
5588 
5589   // 2. Emit reduce_func().
5590   llvm::Function *ReductionFn = emitReductionFunction(
5591       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5592       LHSExprs, RHSExprs, ReductionOps);
5593 
5594   // 3. Create static kmp_critical_name lock = { 0 };
5595   std::string Name = getName({"reduction"});
5596   llvm::Value *Lock = getCriticalRegionLock(Name);
5597 
5598   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5599   // RedList, reduce_func, &<lock>);
5600   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5601   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5602   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5603   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5604       ReductionList.getPointer(), CGF.VoidPtrTy);
5605   llvm::Value *Args[] = {
5606       IdentTLoc,                             // ident_t *<loc>
5607       ThreadId,                              // i32 <gtid>
5608       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5609       ReductionArrayTySize,                  // size_type sizeof(RedList)
5610       RL,                                    // void *RedList
5611       ReductionFn, // void (*) (void *, void *) <reduce_func>
5612       Lock         // kmp_critical_name *&<lock>
5613   };
5614   llvm::Value *Res = CGF.EmitRuntimeCall(
5615       OMPBuilder.getOrCreateRuntimeFunction(
5616           CGM.getModule(),
5617           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5618       Args);
5619 
5620   // 5. Build switch(res)
5621   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5622   llvm::SwitchInst *SwInst =
5623       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5624 
5625   // 6. Build case 1:
5626   //  ...
5627   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5628   //  ...
5629   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5630   // break;
5631   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5632   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5633   CGF.EmitBlock(Case1BB);
5634 
5635   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5636   llvm::Value *EndArgs[] = {
5637       IdentTLoc, // ident_t *<loc>
5638       ThreadId,  // i32 <gtid>
5639       Lock       // kmp_critical_name *&<lock>
5640   };
5641   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5642                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5643     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5644     auto IPriv = Privates.begin();
5645     auto ILHS = LHSExprs.begin();
5646     auto IRHS = RHSExprs.begin();
5647     for (const Expr *E : ReductionOps) {
5648       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5649                                      cast<DeclRefExpr>(*IRHS));
5650       ++IPriv;
5651       ++ILHS;
5652       ++IRHS;
5653     }
5654   };
5655   RegionCodeGenTy RCG(CodeGen);
5656   CommonActionTy Action(
5657       nullptr, llvm::None,
5658       OMPBuilder.getOrCreateRuntimeFunction(
5659           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5660                                       : OMPRTL___kmpc_end_reduce),
5661       EndArgs);
5662   RCG.setAction(Action);
5663   RCG(CGF);
5664 
5665   CGF.EmitBranch(DefaultBB);
5666 
5667   // 7. Build case 2:
5668   //  ...
5669   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5670   //  ...
5671   // break;
5672   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5673   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5674   CGF.EmitBlock(Case2BB);
5675 
5676   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5677                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5678     auto ILHS = LHSExprs.begin();
5679     auto IRHS = RHSExprs.begin();
5680     auto IPriv = Privates.begin();
5681     for (const Expr *E : ReductionOps) {
5682       const Expr *XExpr = nullptr;
5683       const Expr *EExpr = nullptr;
5684       const Expr *UpExpr = nullptr;
5685       BinaryOperatorKind BO = BO_Comma;
5686       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5687         if (BO->getOpcode() == BO_Assign) {
5688           XExpr = BO->getLHS();
5689           UpExpr = BO->getRHS();
5690         }
5691       }
5692       // Try to emit update expression as a simple atomic.
5693       const Expr *RHSExpr = UpExpr;
5694       if (RHSExpr) {
5695         // Analyze RHS part of the whole expression.
5696         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5697                 RHSExpr->IgnoreParenImpCasts())) {
5698           // If this is a conditional operator, analyze its condition for
5699           // min/max reduction operator.
5700           RHSExpr = ACO->getCond();
5701         }
5702         if (const auto *BORHS =
5703                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5704           EExpr = BORHS->getRHS();
5705           BO = BORHS->getOpcode();
5706         }
5707       }
5708       if (XExpr) {
5709         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5710         auto &&AtomicRedGen = [BO, VD,
5711                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5712                                     const Expr *EExpr, const Expr *UpExpr) {
5713           LValue X = CGF.EmitLValue(XExpr);
5714           RValue E;
5715           if (EExpr)
5716             E = CGF.EmitAnyExpr(EExpr);
5717           CGF.EmitOMPAtomicSimpleUpdateExpr(
5718               X, E, BO, /*IsXLHSInRHSPart=*/true,
5719               llvm::AtomicOrdering::Monotonic, Loc,
5720               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5721                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5722                 PrivateScope.addPrivate(
5723                     VD, [&CGF, VD, XRValue, Loc]() {
5724                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5725                       CGF.emitOMPSimpleStore(
5726                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5727                           VD->getType().getNonReferenceType(), Loc);
5728                       return LHSTemp;
5729                     });
5730                 (void)PrivateScope.Privatize();
5731                 return CGF.EmitAnyExpr(UpExpr);
5732               });
5733         };
5734         if ((*IPriv)->getType()->isArrayType()) {
5735           // Emit atomic reduction for array section.
5736           const auto *RHSVar =
5737               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5738           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5739                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5740         } else {
5741           // Emit atomic reduction for array subscript or single variable.
5742           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5743         }
5744       } else {
5745         // Emit as a critical region.
5746         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5747                                            const Expr *, const Expr *) {
5748           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5749           std::string Name = RT.getName({"atomic_reduction"});
5750           RT.emitCriticalRegion(
5751               CGF, Name,
5752               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5753                 Action.Enter(CGF);
5754                 emitReductionCombiner(CGF, E);
5755               },
5756               Loc);
5757         };
5758         if ((*IPriv)->getType()->isArrayType()) {
5759           const auto *LHSVar =
5760               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5761           const auto *RHSVar =
5762               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5763           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5764                                     CritRedGen);
5765         } else {
5766           CritRedGen(CGF, nullptr, nullptr, nullptr);
5767         }
5768       }
5769       ++ILHS;
5770       ++IRHS;
5771       ++IPriv;
5772     }
5773   };
5774   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5775   if (!WithNowait) {
5776     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5777     llvm::Value *EndArgs[] = {
5778         IdentTLoc, // ident_t *<loc>
5779         ThreadId,  // i32 <gtid>
5780         Lock       // kmp_critical_name *&<lock>
5781     };
5782     CommonActionTy Action(nullptr, llvm::None,
5783                           OMPBuilder.getOrCreateRuntimeFunction(
5784                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5785                           EndArgs);
5786     AtomicRCG.setAction(Action);
5787     AtomicRCG(CGF);
5788   } else {
5789     AtomicRCG(CGF);
5790   }
5791 
5792   CGF.EmitBranch(DefaultBB);
5793   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5794 }
5795 
5796 /// Generates unique name for artificial threadprivate variables.
5797 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5798 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5799                                       const Expr *Ref) {
5800   SmallString<256> Buffer;
5801   llvm::raw_svector_ostream Out(Buffer);
5802   const clang::DeclRefExpr *DE;
5803   const VarDecl *D = ::getBaseDecl(Ref, DE);
5804   if (!D)
5805     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5806   D = D->getCanonicalDecl();
5807   std::string Name = CGM.getOpenMPRuntime().getName(
5808       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5809   Out << Prefix << Name << "_"
5810       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5811   return std::string(Out.str());
5812 }
5813 
5814 /// Emits reduction initializer function:
5815 /// \code
5816 /// void @.red_init(void* %arg, void* %orig) {
5817 /// %0 = bitcast void* %arg to <type>*
5818 /// store <type> <init>, <type>* %0
5819 /// ret void
5820 /// }
5821 /// \endcode
5822 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5823                                            SourceLocation Loc,
5824                                            ReductionCodeGen &RCG, unsigned N) {
5825   ASTContext &C = CGM.getContext();
5826   QualType VoidPtrTy = C.VoidPtrTy;
5827   VoidPtrTy.addRestrict();
5828   FunctionArgList Args;
5829   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5830                           ImplicitParamDecl::Other);
5831   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5832                               ImplicitParamDecl::Other);
5833   Args.emplace_back(&Param);
5834   Args.emplace_back(&ParamOrig);
5835   const auto &FnInfo =
5836       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5837   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5838   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5839   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5840                                     Name, &CGM.getModule());
5841   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5842   Fn->setDoesNotRecurse();
5843   CodeGenFunction CGF(CGM);
5844   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5845   Address PrivateAddr = CGF.EmitLoadOfPointer(
5846       CGF.GetAddrOfLocalVar(&Param),
5847       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5848   llvm::Value *Size = nullptr;
5849   // If the size of the reduction item is non-constant, load it from global
5850   // threadprivate variable.
5851   if (RCG.getSizes(N).second) {
5852     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5853         CGF, CGM.getContext().getSizeType(),
5854         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5855     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5856                                 CGM.getContext().getSizeType(), Loc);
5857   }
5858   RCG.emitAggregateType(CGF, N, Size);
5859   LValue OrigLVal;
5860   // If initializer uses initializer from declare reduction construct, emit a
5861   // pointer to the address of the original reduction item (reuired by reduction
5862   // initializer)
5863   if (RCG.usesReductionInitializer(N)) {
5864     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5865     SharedAddr = CGF.EmitLoadOfPointer(
5866         SharedAddr,
5867         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5868     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5869   } else {
5870     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5871         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5872         CGM.getContext().VoidPtrTy);
5873   }
5874   // Emit the initializer:
5875   // %0 = bitcast void* %arg to <type>*
5876   // store <type> <init>, <type>* %0
5877   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5878                          [](CodeGenFunction &) { return false; });
5879   CGF.FinishFunction();
5880   return Fn;
5881 }
5882 
5883 /// Emits reduction combiner function:
5884 /// \code
5885 /// void @.red_comb(void* %arg0, void* %arg1) {
5886 /// %lhs = bitcast void* %arg0 to <type>*
5887 /// %rhs = bitcast void* %arg1 to <type>*
5888 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5889 /// store <type> %2, <type>* %lhs
5890 /// ret void
5891 /// }
5892 /// \endcode
5893 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5894                                            SourceLocation Loc,
5895                                            ReductionCodeGen &RCG, unsigned N,
5896                                            const Expr *ReductionOp,
5897                                            const Expr *LHS, const Expr *RHS,
5898                                            const Expr *PrivateRef) {
5899   ASTContext &C = CGM.getContext();
5900   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5901   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5902   FunctionArgList Args;
5903   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5904                                C.VoidPtrTy, ImplicitParamDecl::Other);
5905   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5906                             ImplicitParamDecl::Other);
5907   Args.emplace_back(&ParamInOut);
5908   Args.emplace_back(&ParamIn);
5909   const auto &FnInfo =
5910       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5911   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5912   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5913   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5914                                     Name, &CGM.getModule());
5915   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5916   Fn->setDoesNotRecurse();
5917   CodeGenFunction CGF(CGM);
5918   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5919   llvm::Value *Size = nullptr;
5920   // If the size of the reduction item is non-constant, load it from global
5921   // threadprivate variable.
5922   if (RCG.getSizes(N).second) {
5923     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5924         CGF, CGM.getContext().getSizeType(),
5925         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5926     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5927                                 CGM.getContext().getSizeType(), Loc);
5928   }
5929   RCG.emitAggregateType(CGF, N, Size);
5930   // Remap lhs and rhs variables to the addresses of the function arguments.
5931   // %lhs = bitcast void* %arg0 to <type>*
5932   // %rhs = bitcast void* %arg1 to <type>*
5933   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5934   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5935     // Pull out the pointer to the variable.
5936     Address PtrAddr = CGF.EmitLoadOfPointer(
5937         CGF.GetAddrOfLocalVar(&ParamInOut),
5938         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5939     return CGF.Builder.CreateElementBitCast(
5940         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5941   });
5942   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5943     // Pull out the pointer to the variable.
5944     Address PtrAddr = CGF.EmitLoadOfPointer(
5945         CGF.GetAddrOfLocalVar(&ParamIn),
5946         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5947     return CGF.Builder.CreateElementBitCast(
5948         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5949   });
5950   PrivateScope.Privatize();
5951   // Emit the combiner body:
5952   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5953   // store <type> %2, <type>* %lhs
5954   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5955       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5956       cast<DeclRefExpr>(RHS));
5957   CGF.FinishFunction();
5958   return Fn;
5959 }
5960 
5961 /// Emits reduction finalizer function:
5962 /// \code
5963 /// void @.red_fini(void* %arg) {
5964 /// %0 = bitcast void* %arg to <type>*
5965 /// <destroy>(<type>* %0)
5966 /// ret void
5967 /// }
5968 /// \endcode
5969 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5970                                            SourceLocation Loc,
5971                                            ReductionCodeGen &RCG, unsigned N) {
5972   if (!RCG.needCleanups(N))
5973     return nullptr;
5974   ASTContext &C = CGM.getContext();
5975   FunctionArgList Args;
5976   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5977                           ImplicitParamDecl::Other);
5978   Args.emplace_back(&Param);
5979   const auto &FnInfo =
5980       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5981   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5982   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5983   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5984                                     Name, &CGM.getModule());
5985   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5986   Fn->setDoesNotRecurse();
5987   CodeGenFunction CGF(CGM);
5988   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5989   Address PrivateAddr = CGF.EmitLoadOfPointer(
5990       CGF.GetAddrOfLocalVar(&Param),
5991       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Emit the finalizer body:
6004   // <destroy>(<type>* %0)
6005   RCG.emitCleanups(CGF, N, PrivateAddr);
6006   CGF.FinishFunction(Loc);
6007   return Fn;
6008 }
6009 
6010 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6011     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6012     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6013   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6014     return nullptr;
6015 
6016   // Build typedef struct:
6017   // kmp_taskred_input {
6018   //   void *reduce_shar; // shared reduction item
6019   //   void *reduce_orig; // original reduction item used for initialization
6020   //   size_t reduce_size; // size of data item
6021   //   void *reduce_init; // data initialization routine
6022   //   void *reduce_fini; // data finalization routine
6023   //   void *reduce_comb; // data combiner routine
6024   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6025   // } kmp_taskred_input_t;
6026   ASTContext &C = CGM.getContext();
6027   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6028   RD->startDefinition();
6029   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6032   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6033   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6036       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6037   RD->completeDefinition();
6038   QualType RDType = C.getRecordType(RD);
6039   unsigned Size = Data.ReductionVars.size();
6040   llvm::APInt ArraySize(/*numBits=*/64, Size);
6041   QualType ArrayRDType = C.getConstantArrayType(
6042       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6043   // kmp_task_red_input_t .rd_input.[Size];
6044   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6045   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6046                        Data.ReductionCopies, Data.ReductionOps);
6047   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6048     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6049     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6050                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6051     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6052         TaskRedInput.getPointer(), Idxs,
6053         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6054         ".rd_input.gep.");
6055     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6056     // ElemLVal.reduce_shar = &Shareds[Cnt];
6057     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6058     RCG.emitSharedOrigLValue(CGF, Cnt);
6059     llvm::Value *CastedShared =
6060         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6061     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6062     // ElemLVal.reduce_orig = &Origs[Cnt];
6063     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6064     llvm::Value *CastedOrig =
6065         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6066     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6067     RCG.emitAggregateType(CGF, Cnt);
6068     llvm::Value *SizeValInChars;
6069     llvm::Value *SizeVal;
6070     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6071     // We use delayed creation/initialization for VLAs and array sections. It is
6072     // required because runtime does not provide the way to pass the sizes of
6073     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6074     // threadprivate global variables are used to store these values and use
6075     // them in the functions.
6076     bool DelayedCreation = !!SizeVal;
6077     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6078                                                /*isSigned=*/false);
6079     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6080     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6081     // ElemLVal.reduce_init = init;
6082     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6083     llvm::Value *InitAddr =
6084         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6085     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6086     // ElemLVal.reduce_fini = fini;
6087     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6088     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6089     llvm::Value *FiniAddr = Fini
6090                                 ? CGF.EmitCastToVoidPtr(Fini)
6091                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6092     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6093     // ElemLVal.reduce_comb = comb;
6094     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6095     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6096         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6097         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6098     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6099     // ElemLVal.flags = 0;
6100     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6101     if (DelayedCreation) {
6102       CGF.EmitStoreOfScalar(
6103           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6104           FlagsLVal);
6105     } else
6106       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6107                                  FlagsLVal.getType());
6108   }
6109   if (Data.IsReductionWithTaskMod) {
6110     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6111     // is_ws, int num, void *data);
6112     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6113     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6114                                                   CGM.IntTy, /*isSigned=*/true);
6115     llvm::Value *Args[] = {
6116         IdentTLoc, GTid,
6117         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6118                                /*isSigned=*/true),
6119         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6120         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6121             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6122     return CGF.EmitRuntimeCall(
6123         OMPBuilder.getOrCreateRuntimeFunction(
6124             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6125         Args);
6126   }
6127   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6128   llvm::Value *Args[] = {
6129       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6130                                 /*isSigned=*/true),
6131       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6132       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6133                                                       CGM.VoidPtrTy)};
6134   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6135                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6136                              Args);
6137 }
6138 
6139 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6140                                             SourceLocation Loc,
6141                                             bool IsWorksharingReduction) {
6142   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6143   // is_ws, int num, void *data);
6144   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6145   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6146                                                 CGM.IntTy, /*isSigned=*/true);
6147   llvm::Value *Args[] = {IdentTLoc, GTid,
6148                          llvm::ConstantInt::get(CGM.IntTy,
6149                                                 IsWorksharingReduction ? 1 : 0,
6150                                                 /*isSigned=*/true)};
6151   (void)CGF.EmitRuntimeCall(
6152       OMPBuilder.getOrCreateRuntimeFunction(
6153           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6154       Args);
6155 }
6156 
6157 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6158                                               SourceLocation Loc,
6159                                               ReductionCodeGen &RCG,
6160                                               unsigned N) {
6161   auto Sizes = RCG.getSizes(N);
6162   // Emit threadprivate global variable if the type is non-constant
6163   // (Sizes.second = nullptr).
6164   if (Sizes.second) {
6165     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6166                                                      /*isSigned=*/false);
6167     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6168         CGF, CGM.getContext().getSizeType(),
6169         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6170     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6171   }
6172 }
6173 
6174 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6175                                               SourceLocation Loc,
6176                                               llvm::Value *ReductionsPtr,
6177                                               LValue SharedLVal) {
6178   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6179   // *d);
6180   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6181                                                    CGM.IntTy,
6182                                                    /*isSigned=*/true),
6183                          ReductionsPtr,
6184                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6185                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6186   return Address(
6187       CGF.EmitRuntimeCall(
6188           OMPBuilder.getOrCreateRuntimeFunction(
6189               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6190           Args),
6191       SharedLVal.getAlignment());
6192 }
6193 
6194 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6195                                        SourceLocation Loc) {
6196   if (!CGF.HaveInsertPoint())
6197     return;
6198 
6199   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6200     OMPBuilder.createTaskwait(CGF.Builder);
6201   } else {
6202     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6203     // global_tid);
6204     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6205     // Ignore return result until untied tasks are supported.
6206     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6207                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6208                         Args);
6209   }
6210 
6211   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6212     Region->emitUntiedSwitch(CGF);
6213 }
6214 
6215 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6216                                            OpenMPDirectiveKind InnerKind,
6217                                            const RegionCodeGenTy &CodeGen,
6218                                            bool HasCancel) {
6219   if (!CGF.HaveInsertPoint())
6220     return;
6221   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6222   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6223 }
6224 
6225 namespace {
6226 enum RTCancelKind {
6227   CancelNoreq = 0,
6228   CancelParallel = 1,
6229   CancelLoop = 2,
6230   CancelSections = 3,
6231   CancelTaskgroup = 4
6232 };
6233 } // anonymous namespace
6234 
6235 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6236   RTCancelKind CancelKind = CancelNoreq;
6237   if (CancelRegion == OMPD_parallel)
6238     CancelKind = CancelParallel;
6239   else if (CancelRegion == OMPD_for)
6240     CancelKind = CancelLoop;
6241   else if (CancelRegion == OMPD_sections)
6242     CancelKind = CancelSections;
6243   else {
6244     assert(CancelRegion == OMPD_taskgroup);
6245     CancelKind = CancelTaskgroup;
6246   }
6247   return CancelKind;
6248 }
6249 
6250 void CGOpenMPRuntime::emitCancellationPointCall(
6251     CodeGenFunction &CGF, SourceLocation Loc,
6252     OpenMPDirectiveKind CancelRegion) {
6253   if (!CGF.HaveInsertPoint())
6254     return;
6255   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6256   // global_tid, kmp_int32 cncl_kind);
6257   if (auto *OMPRegionInfo =
6258           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6259     // For 'cancellation point taskgroup', the task region info may not have a
6260     // cancel. This may instead happen in another adjacent task.
6261     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6262       llvm::Value *Args[] = {
6263           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6264           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6265       // Ignore return result until untied tasks are supported.
6266       llvm::Value *Result = CGF.EmitRuntimeCall(
6267           OMPBuilder.getOrCreateRuntimeFunction(
6268               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6269           Args);
6270       // if (__kmpc_cancellationpoint()) {
6271       //   exit from construct;
6272       // }
6273       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6274       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6275       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6276       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6277       CGF.EmitBlock(ExitBB);
6278       // exit from construct;
6279       CodeGenFunction::JumpDest CancelDest =
6280           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6281       CGF.EmitBranchThroughCleanup(CancelDest);
6282       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6283     }
6284   }
6285 }
6286 
6287 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6288                                      const Expr *IfCond,
6289                                      OpenMPDirectiveKind CancelRegion) {
6290   if (!CGF.HaveInsertPoint())
6291     return;
6292   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6293   // kmp_int32 cncl_kind);
6294   auto &M = CGM.getModule();
6295   if (auto *OMPRegionInfo =
6296           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6297     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6298                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6299       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6300       llvm::Value *Args[] = {
6301           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6302           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6303       // Ignore return result until untied tasks are supported.
6304       llvm::Value *Result = CGF.EmitRuntimeCall(
6305           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6306       // if (__kmpc_cancel()) {
6307       //   exit from construct;
6308       // }
6309       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6310       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6311       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6312       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6313       CGF.EmitBlock(ExitBB);
6314       // exit from construct;
6315       CodeGenFunction::JumpDest CancelDest =
6316           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6317       CGF.EmitBranchThroughCleanup(CancelDest);
6318       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6319     };
6320     if (IfCond) {
6321       emitIfClause(CGF, IfCond, ThenGen,
6322                    [](CodeGenFunction &, PrePostActionTy &) {});
6323     } else {
6324       RegionCodeGenTy ThenRCG(ThenGen);
6325       ThenRCG(CGF);
6326     }
6327   }
6328 }
6329 
6330 namespace {
6331 /// Cleanup action for uses_allocators support.
6332 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6333   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6334 
6335 public:
6336   OMPUsesAllocatorsActionTy(
6337       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6338       : Allocators(Allocators) {}
6339   void Enter(CodeGenFunction &CGF) override {
6340     if (!CGF.HaveInsertPoint())
6341       return;
6342     for (const auto &AllocatorData : Allocators) {
6343       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6344           CGF, AllocatorData.first, AllocatorData.second);
6345     }
6346   }
6347   void Exit(CodeGenFunction &CGF) override {
6348     if (!CGF.HaveInsertPoint())
6349       return;
6350     for (const auto &AllocatorData : Allocators) {
6351       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6352                                                         AllocatorData.first);
6353     }
6354   }
6355 };
6356 } // namespace
6357 
6358 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6359     const OMPExecutableDirective &D, StringRef ParentName,
6360     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6361     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6362   assert(!ParentName.empty() && "Invalid target region parent name!");
6363   HasEmittedTargetRegion = true;
6364   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6365   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6366     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6367       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6368       if (!D.AllocatorTraits)
6369         continue;
6370       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6371     }
6372   }
6373   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6374   CodeGen.setAction(UsesAllocatorAction);
6375   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6376                                    IsOffloadEntry, CodeGen);
6377 }
6378 
6379 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6380                                              const Expr *Allocator,
6381                                              const Expr *AllocatorTraits) {
6382   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6383   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6384   // Use default memspace handle.
6385   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6386   llvm::Value *NumTraits = llvm::ConstantInt::get(
6387       CGF.IntTy, cast<ConstantArrayType>(
6388                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6389                      ->getSize()
6390                      .getLimitedValue());
6391   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6392   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6393       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6394   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6395                                            AllocatorTraitsLVal.getBaseInfo(),
6396                                            AllocatorTraitsLVal.getTBAAInfo());
6397   llvm::Value *Traits =
6398       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6399 
6400   llvm::Value *AllocatorVal =
6401       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6402                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6403                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6404   // Store to allocator.
6405   CGF.EmitVarDecl(*cast<VarDecl>(
6406       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6407   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6408   AllocatorVal =
6409       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6410                                Allocator->getType(), Allocator->getExprLoc());
6411   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6412 }
6413 
6414 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6415                                              const Expr *Allocator) {
6416   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6417   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6418   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6419   llvm::Value *AllocatorVal =
6420       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6421   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6422                                           CGF.getContext().VoidPtrTy,
6423                                           Allocator->getExprLoc());
6424   (void)CGF.EmitRuntimeCall(
6425       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6426                                             OMPRTL___kmpc_destroy_allocator),
6427       {ThreadId, AllocatorVal});
6428 }
6429 
6430 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6431     const OMPExecutableDirective &D, StringRef ParentName,
6432     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6433     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6434   // Create a unique name for the entry function using the source location
6435   // information of the current target region. The name will be something like:
6436   //
6437   // __omp_offloading_DD_FFFF_PP_lBB
6438   //
6439   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6440   // mangled name of the function that encloses the target region and BB is the
6441   // line number of the target region.
6442 
6443   unsigned DeviceID;
6444   unsigned FileID;
6445   unsigned Line;
6446   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6447                            Line);
6448   SmallString<64> EntryFnName;
6449   {
6450     llvm::raw_svector_ostream OS(EntryFnName);
6451     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6452        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6453   }
6454 
6455   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6456 
6457   CodeGenFunction CGF(CGM, true);
6458   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6459   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 
6461   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6462 
6463   // If this target outline function is not an offload entry, we don't need to
6464   // register it.
6465   if (!IsOffloadEntry)
6466     return;
6467 
6468   // The target region ID is used by the runtime library to identify the current
6469   // target region, so it only has to be unique and not necessarily point to
6470   // anything. It could be the pointer to the outlined function that implements
6471   // the target region, but we aren't using that so that the compiler doesn't
6472   // need to keep that, and could therefore inline the host function if proven
6473   // worthwhile during optimization. In the other hand, if emitting code for the
6474   // device, the ID has to be the function address so that it can retrieved from
6475   // the offloading entry and launched by the runtime library. We also mark the
6476   // outlined function to have external linkage in case we are emitting code for
6477   // the device, because these functions will be entry points to the device.
6478 
6479   if (CGM.getLangOpts().OpenMPIsDevice) {
6480     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6481     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6482     OutlinedFn->setDSOLocal(false);
6483     if (CGM.getTriple().isAMDGCN())
6484       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6485   } else {
6486     std::string Name = getName({EntryFnName, "region_id"});
6487     OutlinedFnID = new llvm::GlobalVariable(
6488         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6489         llvm::GlobalValue::WeakAnyLinkage,
6490         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6491   }
6492 
6493   // Register the information for the entry associated with this target region.
6494   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6495       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6496       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6497 }
6498 
6499 /// Checks if the expression is constant or does not have non-trivial function
6500 /// calls.
6501 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6502   // We can skip constant expressions.
6503   // We can skip expressions with trivial calls or simple expressions.
6504   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6505           !E->hasNonTrivialCall(Ctx)) &&
6506          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6507 }
6508 
6509 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6510                                                     const Stmt *Body) {
6511   const Stmt *Child = Body->IgnoreContainers();
6512   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6513     Child = nullptr;
6514     for (const Stmt *S : C->body()) {
6515       if (const auto *E = dyn_cast<Expr>(S)) {
6516         if (isTrivial(Ctx, E))
6517           continue;
6518       }
6519       // Some of the statements can be ignored.
6520       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6521           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6522         continue;
6523       // Analyze declarations.
6524       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6525         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6526               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6527                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6528                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6529                   isa<UsingDirectiveDecl>(D) ||
6530                   isa<OMPDeclareReductionDecl>(D) ||
6531                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6532                 return true;
6533               const auto *VD = dyn_cast<VarDecl>(D);
6534               if (!VD)
6535                 return false;
6536               return VD->isConstexpr() ||
6537                      ((VD->getType().isTrivialType(Ctx) ||
6538                        VD->getType()->isReferenceType()) &&
6539                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6540             }))
6541           continue;
6542       }
6543       // Found multiple children - cannot get the one child only.
6544       if (Child)
6545         return nullptr;
6546       Child = S;
6547     }
6548     if (Child)
6549       Child = Child->IgnoreContainers();
6550   }
6551   return Child;
6552 }
6553 
6554 /// Emit the number of teams for a target directive.  Inspect the num_teams
6555 /// clause associated with a teams construct combined or closely nested
6556 /// with the target directive.
6557 ///
6558 /// Emit a team of size one for directives such as 'target parallel' that
6559 /// have no associated teams construct.
6560 ///
6561 /// Otherwise, return nullptr.
6562 static llvm::Value *
6563 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6564                                const OMPExecutableDirective &D) {
6565   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6566          "Clauses associated with the teams directive expected to be emitted "
6567          "only for the host!");
6568   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6569   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6570          "Expected target-based executable directive.");
6571   CGBuilderTy &Bld = CGF.Builder;
6572   switch (DirectiveKind) {
6573   case OMPD_target: {
6574     const auto *CS = D.getInnermostCapturedStmt();
6575     const auto *Body =
6576         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6577     const Stmt *ChildStmt =
6578         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6579     if (const auto *NestedDir =
6580             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6581       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6582         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6583           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6584           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6585           const Expr *NumTeams =
6586               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6587           llvm::Value *NumTeamsVal =
6588               CGF.EmitScalarExpr(NumTeams,
6589                                  /*IgnoreResultAssign*/ true);
6590           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6591                                    /*isSigned=*/true);
6592         }
6593         return Bld.getInt32(0);
6594       }
6595       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6596           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6597         return Bld.getInt32(1);
6598       return Bld.getInt32(0);
6599     }
6600     return nullptr;
6601   }
6602   case OMPD_target_teams:
6603   case OMPD_target_teams_distribute:
6604   case OMPD_target_teams_distribute_simd:
6605   case OMPD_target_teams_distribute_parallel_for:
6606   case OMPD_target_teams_distribute_parallel_for_simd: {
6607     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6608       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6609       const Expr *NumTeams =
6610           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6611       llvm::Value *NumTeamsVal =
6612           CGF.EmitScalarExpr(NumTeams,
6613                              /*IgnoreResultAssign*/ true);
6614       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6615                                /*isSigned=*/true);
6616     }
6617     return Bld.getInt32(0);
6618   }
6619   case OMPD_target_parallel:
6620   case OMPD_target_parallel_for:
6621   case OMPD_target_parallel_for_simd:
6622   case OMPD_target_simd:
6623     return Bld.getInt32(1);
6624   case OMPD_parallel:
6625   case OMPD_for:
6626   case OMPD_parallel_for:
6627   case OMPD_parallel_master:
6628   case OMPD_parallel_sections:
6629   case OMPD_for_simd:
6630   case OMPD_parallel_for_simd:
6631   case OMPD_cancel:
6632   case OMPD_cancellation_point:
6633   case OMPD_ordered:
6634   case OMPD_threadprivate:
6635   case OMPD_allocate:
6636   case OMPD_task:
6637   case OMPD_simd:
6638   case OMPD_tile:
6639   case OMPD_sections:
6640   case OMPD_section:
6641   case OMPD_single:
6642   case OMPD_master:
6643   case OMPD_critical:
6644   case OMPD_taskyield:
6645   case OMPD_barrier:
6646   case OMPD_taskwait:
6647   case OMPD_taskgroup:
6648   case OMPD_atomic:
6649   case OMPD_flush:
6650   case OMPD_depobj:
6651   case OMPD_scan:
6652   case OMPD_teams:
6653   case OMPD_target_data:
6654   case OMPD_target_exit_data:
6655   case OMPD_target_enter_data:
6656   case OMPD_distribute:
6657   case OMPD_distribute_simd:
6658   case OMPD_distribute_parallel_for:
6659   case OMPD_distribute_parallel_for_simd:
6660   case OMPD_teams_distribute:
6661   case OMPD_teams_distribute_simd:
6662   case OMPD_teams_distribute_parallel_for:
6663   case OMPD_teams_distribute_parallel_for_simd:
6664   case OMPD_target_update:
6665   case OMPD_declare_simd:
6666   case OMPD_declare_variant:
6667   case OMPD_begin_declare_variant:
6668   case OMPD_end_declare_variant:
6669   case OMPD_declare_target:
6670   case OMPD_end_declare_target:
6671   case OMPD_declare_reduction:
6672   case OMPD_declare_mapper:
6673   case OMPD_taskloop:
6674   case OMPD_taskloop_simd:
6675   case OMPD_master_taskloop:
6676   case OMPD_master_taskloop_simd:
6677   case OMPD_parallel_master_taskloop:
6678   case OMPD_parallel_master_taskloop_simd:
6679   case OMPD_requires:
6680   case OMPD_unknown:
6681     break;
6682   default:
6683     break;
6684   }
6685   llvm_unreachable("Unexpected directive kind.");
6686 }
6687 
6688 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6689                                   llvm::Value *DefaultThreadLimitVal) {
6690   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6691       CGF.getContext(), CS->getCapturedStmt());
6692   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6693     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6694       llvm::Value *NumThreads = nullptr;
6695       llvm::Value *CondVal = nullptr;
6696       // Handle if clause. If if clause present, the number of threads is
6697       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6698       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6699         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6700         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6701         const OMPIfClause *IfClause = nullptr;
6702         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6703           if (C->getNameModifier() == OMPD_unknown ||
6704               C->getNameModifier() == OMPD_parallel) {
6705             IfClause = C;
6706             break;
6707           }
6708         }
6709         if (IfClause) {
6710           const Expr *Cond = IfClause->getCondition();
6711           bool Result;
6712           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6713             if (!Result)
6714               return CGF.Builder.getInt32(1);
6715           } else {
6716             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6717             if (const auto *PreInit =
6718                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6719               for (const auto *I : PreInit->decls()) {
6720                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6721                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6722                 } else {
6723                   CodeGenFunction::AutoVarEmission Emission =
6724                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6725                   CGF.EmitAutoVarCleanups(Emission);
6726                 }
6727               }
6728             }
6729             CondVal = CGF.EvaluateExprAsBool(Cond);
6730           }
6731         }
6732       }
6733       // Check the value of num_threads clause iff if clause was not specified
6734       // or is not evaluated to false.
6735       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6736         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6737         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6738         const auto *NumThreadsClause =
6739             Dir->getSingleClause<OMPNumThreadsClause>();
6740         CodeGenFunction::LexicalScope Scope(
6741             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6742         if (const auto *PreInit =
6743                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6744           for (const auto *I : PreInit->decls()) {
6745             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6746               CGF.EmitVarDecl(cast<VarDecl>(*I));
6747             } else {
6748               CodeGenFunction::AutoVarEmission Emission =
6749                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6750               CGF.EmitAutoVarCleanups(Emission);
6751             }
6752           }
6753         }
6754         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6755         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6756                                                /*isSigned=*/false);
6757         if (DefaultThreadLimitVal)
6758           NumThreads = CGF.Builder.CreateSelect(
6759               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6760               DefaultThreadLimitVal, NumThreads);
6761       } else {
6762         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6763                                            : CGF.Builder.getInt32(0);
6764       }
6765       // Process condition of the if clause.
6766       if (CondVal) {
6767         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6768                                               CGF.Builder.getInt32(1));
6769       }
6770       return NumThreads;
6771     }
6772     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6773       return CGF.Builder.getInt32(1);
6774     return DefaultThreadLimitVal;
6775   }
6776   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6777                                : CGF.Builder.getInt32(0);
6778 }
6779 
6780 /// Emit the number of threads for a target directive.  Inspect the
6781 /// thread_limit clause associated with a teams construct combined or closely
6782 /// nested with the target directive.
6783 ///
6784 /// Emit the num_threads clause for directives such as 'target parallel' that
6785 /// have no associated teams construct.
6786 ///
6787 /// Otherwise, return nullptr.
6788 static llvm::Value *
6789 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6790                                  const OMPExecutableDirective &D) {
6791   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6792          "Clauses associated with the teams directive expected to be emitted "
6793          "only for the host!");
6794   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6795   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6796          "Expected target-based executable directive.");
6797   CGBuilderTy &Bld = CGF.Builder;
6798   llvm::Value *ThreadLimitVal = nullptr;
6799   llvm::Value *NumThreadsVal = nullptr;
6800   switch (DirectiveKind) {
6801   case OMPD_target: {
6802     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6803     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6804       return NumThreads;
6805     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6806         CGF.getContext(), CS->getCapturedStmt());
6807     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6808       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6809         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6810         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6811         const auto *ThreadLimitClause =
6812             Dir->getSingleClause<OMPThreadLimitClause>();
6813         CodeGenFunction::LexicalScope Scope(
6814             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6815         if (const auto *PreInit =
6816                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6817           for (const auto *I : PreInit->decls()) {
6818             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6819               CGF.EmitVarDecl(cast<VarDecl>(*I));
6820             } else {
6821               CodeGenFunction::AutoVarEmission Emission =
6822                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6823               CGF.EmitAutoVarCleanups(Emission);
6824             }
6825           }
6826         }
6827         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6828             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6829         ThreadLimitVal =
6830             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6831       }
6832       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6833           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6834         CS = Dir->getInnermostCapturedStmt();
6835         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6836             CGF.getContext(), CS->getCapturedStmt());
6837         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6838       }
6839       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6840           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6841         CS = Dir->getInnermostCapturedStmt();
6842         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6843           return NumThreads;
6844       }
6845       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6846         return Bld.getInt32(1);
6847     }
6848     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6849   }
6850   case OMPD_target_teams: {
6851     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6852       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6853       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6854       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6855           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6856       ThreadLimitVal =
6857           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6858     }
6859     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6860     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6861       return NumThreads;
6862     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6863         CGF.getContext(), CS->getCapturedStmt());
6864     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6865       if (Dir->getDirectiveKind() == OMPD_distribute) {
6866         CS = Dir->getInnermostCapturedStmt();
6867         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6868           return NumThreads;
6869       }
6870     }
6871     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6872   }
6873   case OMPD_target_teams_distribute:
6874     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6875       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6876       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6877       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6878           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6879       ThreadLimitVal =
6880           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6881     }
6882     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6883   case OMPD_target_parallel:
6884   case OMPD_target_parallel_for:
6885   case OMPD_target_parallel_for_simd:
6886   case OMPD_target_teams_distribute_parallel_for:
6887   case OMPD_target_teams_distribute_parallel_for_simd: {
6888     llvm::Value *CondVal = nullptr;
6889     // Handle if clause. If if clause present, the number of threads is
6890     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6891     if (D.hasClausesOfKind<OMPIfClause>()) {
6892       const OMPIfClause *IfClause = nullptr;
6893       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6894         if (C->getNameModifier() == OMPD_unknown ||
6895             C->getNameModifier() == OMPD_parallel) {
6896           IfClause = C;
6897           break;
6898         }
6899       }
6900       if (IfClause) {
6901         const Expr *Cond = IfClause->getCondition();
6902         bool Result;
6903         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6904           if (!Result)
6905             return Bld.getInt32(1);
6906         } else {
6907           CodeGenFunction::RunCleanupsScope Scope(CGF);
6908           CondVal = CGF.EvaluateExprAsBool(Cond);
6909         }
6910       }
6911     }
6912     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6913       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6914       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6915       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6916           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6917       ThreadLimitVal =
6918           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6919     }
6920     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6921       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6922       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6923       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6924           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6925       NumThreadsVal =
6926           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6927       ThreadLimitVal = ThreadLimitVal
6928                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6929                                                                 ThreadLimitVal),
6930                                               NumThreadsVal, ThreadLimitVal)
6931                            : NumThreadsVal;
6932     }
6933     if (!ThreadLimitVal)
6934       ThreadLimitVal = Bld.getInt32(0);
6935     if (CondVal)
6936       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6937     return ThreadLimitVal;
6938   }
6939   case OMPD_target_teams_distribute_simd:
6940   case OMPD_target_simd:
6941     return Bld.getInt32(1);
6942   case OMPD_parallel:
6943   case OMPD_for:
6944   case OMPD_parallel_for:
6945   case OMPD_parallel_master:
6946   case OMPD_parallel_sections:
6947   case OMPD_for_simd:
6948   case OMPD_parallel_for_simd:
6949   case OMPD_cancel:
6950   case OMPD_cancellation_point:
6951   case OMPD_ordered:
6952   case OMPD_threadprivate:
6953   case OMPD_allocate:
6954   case OMPD_task:
6955   case OMPD_simd:
6956   case OMPD_tile:
6957   case OMPD_sections:
6958   case OMPD_section:
6959   case OMPD_single:
6960   case OMPD_master:
6961   case OMPD_critical:
6962   case OMPD_taskyield:
6963   case OMPD_barrier:
6964   case OMPD_taskwait:
6965   case OMPD_taskgroup:
6966   case OMPD_atomic:
6967   case OMPD_flush:
6968   case OMPD_depobj:
6969   case OMPD_scan:
6970   case OMPD_teams:
6971   case OMPD_target_data:
6972   case OMPD_target_exit_data:
6973   case OMPD_target_enter_data:
6974   case OMPD_distribute:
6975   case OMPD_distribute_simd:
6976   case OMPD_distribute_parallel_for:
6977   case OMPD_distribute_parallel_for_simd:
6978   case OMPD_teams_distribute:
6979   case OMPD_teams_distribute_simd:
6980   case OMPD_teams_distribute_parallel_for:
6981   case OMPD_teams_distribute_parallel_for_simd:
6982   case OMPD_target_update:
6983   case OMPD_declare_simd:
6984   case OMPD_declare_variant:
6985   case OMPD_begin_declare_variant:
6986   case OMPD_end_declare_variant:
6987   case OMPD_declare_target:
6988   case OMPD_end_declare_target:
6989   case OMPD_declare_reduction:
6990   case OMPD_declare_mapper:
6991   case OMPD_taskloop:
6992   case OMPD_taskloop_simd:
6993   case OMPD_master_taskloop:
6994   case OMPD_master_taskloop_simd:
6995   case OMPD_parallel_master_taskloop:
6996   case OMPD_parallel_master_taskloop_simd:
6997   case OMPD_requires:
6998   case OMPD_unknown:
6999     break;
7000   default:
7001     break;
7002   }
7003   llvm_unreachable("Unsupported directive kind.");
7004 }
7005 
7006 namespace {
7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7008 
7009 // Utility to handle information from clauses associated with a given
7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7011 // It provides a convenient interface to obtain the information and generate
7012 // code for that information.
7013 class MappableExprsHandler {
7014 public:
7015   /// Values for bit flags used to specify the mapping type for
7016   /// offloading.
7017   enum OpenMPOffloadMappingFlags : uint64_t {
7018     /// No flags
7019     OMP_MAP_NONE = 0x0,
7020     /// Allocate memory on the device and move data from host to device.
7021     OMP_MAP_TO = 0x01,
7022     /// Allocate memory on the device and move data from device to host.
7023     OMP_MAP_FROM = 0x02,
7024     /// Always perform the requested mapping action on the element, even
7025     /// if it was already mapped before.
7026     OMP_MAP_ALWAYS = 0x04,
7027     /// Delete the element from the device environment, ignoring the
7028     /// current reference count associated with the element.
7029     OMP_MAP_DELETE = 0x08,
7030     /// The element being mapped is a pointer-pointee pair; both the
7031     /// pointer and the pointee should be mapped.
7032     OMP_MAP_PTR_AND_OBJ = 0x10,
7033     /// This flags signals that the base address of an entry should be
7034     /// passed to the target kernel as an argument.
7035     OMP_MAP_TARGET_PARAM = 0x20,
7036     /// Signal that the runtime library has to return the device pointer
7037     /// in the current position for the data being mapped. Used when we have the
7038     /// use_device_ptr or use_device_addr clause.
7039     OMP_MAP_RETURN_PARAM = 0x40,
7040     /// This flag signals that the reference being passed is a pointer to
7041     /// private data.
7042     OMP_MAP_PRIVATE = 0x80,
7043     /// Pass the element to the device by value.
7044     OMP_MAP_LITERAL = 0x100,
7045     /// Implicit map
7046     OMP_MAP_IMPLICIT = 0x200,
7047     /// Close is a hint to the runtime to allocate memory close to
7048     /// the target device.
7049     OMP_MAP_CLOSE = 0x400,
7050     /// 0x800 is reserved for compatibility with XLC.
7051     /// Produce a runtime error if the data is not already allocated.
7052     OMP_MAP_PRESENT = 0x1000,
7053     /// Signal that the runtime library should use args as an array of
7054     /// descriptor_dim pointers and use args_size as dims. Used when we have
7055     /// non-contiguous list items in target update directive
7056     OMP_MAP_NON_CONTIG = 0x100000000000,
7057     /// The 16 MSBs of the flags indicate whether the entry is member of some
7058     /// struct/class.
7059     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7060     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7061   };
7062 
7063   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7064   static unsigned getFlagMemberOffset() {
7065     unsigned Offset = 0;
7066     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7067          Remain = Remain >> 1)
7068       Offset++;
7069     return Offset;
7070   }
7071 
7072   /// Class that holds debugging information for a data mapping to be passed to
7073   /// the runtime library.
7074   class MappingExprInfo {
7075     /// The variable declaration used for the data mapping.
7076     const ValueDecl *MapDecl = nullptr;
7077     /// The original expression used in the map clause, or null if there is
7078     /// none.
7079     const Expr *MapExpr = nullptr;
7080 
7081   public:
7082     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7083         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7084 
7085     const ValueDecl *getMapDecl() const { return MapDecl; }
7086     const Expr *getMapExpr() const { return MapExpr; }
7087   };
7088 
7089   /// Class that associates information with a base pointer to be passed to the
7090   /// runtime library.
7091   class BasePointerInfo {
7092     /// The base pointer.
7093     llvm::Value *Ptr = nullptr;
7094     /// The base declaration that refers to this device pointer, or null if
7095     /// there is none.
7096     const ValueDecl *DevPtrDecl = nullptr;
7097 
7098   public:
7099     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7100         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7101     llvm::Value *operator*() const { return Ptr; }
7102     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7103     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7104   };
7105 
7106   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7107   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7108   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7109   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7110   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7111   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7112   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7113 
7114   /// This structure contains combined information generated for mappable
7115   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7116   /// mappers, and non-contiguous information.
7117   struct MapCombinedInfoTy {
7118     struct StructNonContiguousInfo {
7119       bool IsNonContiguous = false;
7120       MapDimArrayTy Dims;
7121       MapNonContiguousArrayTy Offsets;
7122       MapNonContiguousArrayTy Counts;
7123       MapNonContiguousArrayTy Strides;
7124     };
7125     MapExprsArrayTy Exprs;
7126     MapBaseValuesArrayTy BasePointers;
7127     MapValuesArrayTy Pointers;
7128     MapValuesArrayTy Sizes;
7129     MapFlagsArrayTy Types;
7130     MapMappersArrayTy Mappers;
7131     StructNonContiguousInfo NonContigInfo;
7132 
7133     /// Append arrays in \a CurInfo.
7134     void append(MapCombinedInfoTy &CurInfo) {
7135       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7136       BasePointers.append(CurInfo.BasePointers.begin(),
7137                           CurInfo.BasePointers.end());
7138       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7139       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7140       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7141       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7142       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7143                                  CurInfo.NonContigInfo.Dims.end());
7144       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7145                                     CurInfo.NonContigInfo.Offsets.end());
7146       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7147                                    CurInfo.NonContigInfo.Counts.end());
7148       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7149                                     CurInfo.NonContigInfo.Strides.end());
7150     }
7151   };
7152 
7153   /// Map between a struct and the its lowest & highest elements which have been
7154   /// mapped.
7155   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7156   ///                    HE(FieldIndex, Pointer)}
7157   struct StructRangeInfoTy {
7158     MapCombinedInfoTy PreliminaryMapData;
7159     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7160         0, Address::invalid()};
7161     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7162         0, Address::invalid()};
7163     Address Base = Address::invalid();
7164     Address LB = Address::invalid();
7165     bool IsArraySection = false;
7166     bool HasCompleteRecord = false;
7167   };
7168 
7169 private:
7170   /// Kind that defines how a device pointer has to be returned.
7171   struct MapInfo {
7172     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7173     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7174     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7175     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7176     bool ReturnDevicePointer = false;
7177     bool IsImplicit = false;
7178     const ValueDecl *Mapper = nullptr;
7179     const Expr *VarRef = nullptr;
7180     bool ForDeviceAddr = false;
7181 
7182     MapInfo() = default;
7183     MapInfo(
7184         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7185         OpenMPMapClauseKind MapType,
7186         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7187         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7188         bool ReturnDevicePointer, bool IsImplicit,
7189         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7190         bool ForDeviceAddr = false)
7191         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7192           MotionModifiers(MotionModifiers),
7193           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7194           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7195   };
7196 
7197   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7198   /// member and there is no map information about it, then emission of that
7199   /// entry is deferred until the whole struct has been processed.
7200   struct DeferredDevicePtrEntryTy {
7201     const Expr *IE = nullptr;
7202     const ValueDecl *VD = nullptr;
7203     bool ForDeviceAddr = false;
7204 
7205     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7206                              bool ForDeviceAddr)
7207         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7208   };
7209 
7210   /// The target directive from where the mappable clauses were extracted. It
7211   /// is either a executable directive or a user-defined mapper directive.
7212   llvm::PointerUnion<const OMPExecutableDirective *,
7213                      const OMPDeclareMapperDecl *>
7214       CurDir;
7215 
7216   /// Function the directive is being generated for.
7217   CodeGenFunction &CGF;
7218 
7219   /// Set of all first private variables in the current directive.
7220   /// bool data is set to true if the variable is implicitly marked as
7221   /// firstprivate, false otherwise.
7222   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7223 
7224   /// Map between device pointer declarations and their expression components.
7225   /// The key value for declarations in 'this' is null.
7226   llvm::DenseMap<
7227       const ValueDecl *,
7228       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7229       DevPointersMap;
7230 
7231   llvm::Value *getExprTypeSize(const Expr *E) const {
7232     QualType ExprTy = E->getType().getCanonicalType();
7233 
7234     // Calculate the size for array shaping expression.
7235     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7236       llvm::Value *Size =
7237           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7238       for (const Expr *SE : OAE->getDimensions()) {
7239         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7240         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7241                                       CGF.getContext().getSizeType(),
7242                                       SE->getExprLoc());
7243         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7244       }
7245       return Size;
7246     }
7247 
7248     // Reference types are ignored for mapping purposes.
7249     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7250       ExprTy = RefTy->getPointeeType().getCanonicalType();
7251 
7252     // Given that an array section is considered a built-in type, we need to
7253     // do the calculation based on the length of the section instead of relying
7254     // on CGF.getTypeSize(E->getType()).
7255     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7256       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7257                             OAE->getBase()->IgnoreParenImpCasts())
7258                             .getCanonicalType();
7259 
7260       // If there is no length associated with the expression and lower bound is
7261       // not specified too, that means we are using the whole length of the
7262       // base.
7263       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7264           !OAE->getLowerBound())
7265         return CGF.getTypeSize(BaseTy);
7266 
7267       llvm::Value *ElemSize;
7268       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7269         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7270       } else {
7271         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7272         assert(ATy && "Expecting array type if not a pointer type.");
7273         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7274       }
7275 
7276       // If we don't have a length at this point, that is because we have an
7277       // array section with a single element.
7278       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7279         return ElemSize;
7280 
7281       if (const Expr *LenExpr = OAE->getLength()) {
7282         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7283         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7284                                              CGF.getContext().getSizeType(),
7285                                              LenExpr->getExprLoc());
7286         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7287       }
7288       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7289              OAE->getLowerBound() && "expected array_section[lb:].");
7290       // Size = sizetype - lb * elemtype;
7291       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7292       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7293       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7294                                        CGF.getContext().getSizeType(),
7295                                        OAE->getLowerBound()->getExprLoc());
7296       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7297       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7298       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7299       LengthVal = CGF.Builder.CreateSelect(
7300           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7301       return LengthVal;
7302     }
7303     return CGF.getTypeSize(ExprTy);
7304   }
7305 
7306   /// Return the corresponding bits for a given map clause modifier. Add
7307   /// a flag marking the map as a pointer if requested. Add a flag marking the
7308   /// map as the first one of a series of maps that relate to the same map
7309   /// expression.
7310   OpenMPOffloadMappingFlags getMapTypeBits(
7311       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7312       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7313       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7314     OpenMPOffloadMappingFlags Bits =
7315         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7316     switch (MapType) {
7317     case OMPC_MAP_alloc:
7318     case OMPC_MAP_release:
7319       // alloc and release is the default behavior in the runtime library,  i.e.
7320       // if we don't pass any bits alloc/release that is what the runtime is
7321       // going to do. Therefore, we don't need to signal anything for these two
7322       // type modifiers.
7323       break;
7324     case OMPC_MAP_to:
7325       Bits |= OMP_MAP_TO;
7326       break;
7327     case OMPC_MAP_from:
7328       Bits |= OMP_MAP_FROM;
7329       break;
7330     case OMPC_MAP_tofrom:
7331       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7332       break;
7333     case OMPC_MAP_delete:
7334       Bits |= OMP_MAP_DELETE;
7335       break;
7336     case OMPC_MAP_unknown:
7337       llvm_unreachable("Unexpected map type!");
7338     }
7339     if (AddPtrFlag)
7340       Bits |= OMP_MAP_PTR_AND_OBJ;
7341     if (AddIsTargetParamFlag)
7342       Bits |= OMP_MAP_TARGET_PARAM;
7343     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7344         != MapModifiers.end())
7345       Bits |= OMP_MAP_ALWAYS;
7346     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7347         != MapModifiers.end())
7348       Bits |= OMP_MAP_CLOSE;
7349     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7350             MapModifiers.end() ||
7351         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7352             MotionModifiers.end())
7353       Bits |= OMP_MAP_PRESENT;
7354     if (IsNonContiguous)
7355       Bits |= OMP_MAP_NON_CONTIG;
7356     return Bits;
7357   }
7358 
7359   /// Return true if the provided expression is a final array section. A
7360   /// final array section, is one whose length can't be proved to be one.
7361   bool isFinalArraySectionExpression(const Expr *E) const {
7362     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7363 
7364     // It is not an array section and therefore not a unity-size one.
7365     if (!OASE)
7366       return false;
7367 
7368     // An array section with no colon always refer to a single element.
7369     if (OASE->getColonLocFirst().isInvalid())
7370       return false;
7371 
7372     const Expr *Length = OASE->getLength();
7373 
7374     // If we don't have a length we have to check if the array has size 1
7375     // for this dimension. Also, we should always expect a length if the
7376     // base type is pointer.
7377     if (!Length) {
7378       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7379                              OASE->getBase()->IgnoreParenImpCasts())
7380                              .getCanonicalType();
7381       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7382         return ATy->getSize().getSExtValue() != 1;
7383       // If we don't have a constant dimension length, we have to consider
7384       // the current section as having any size, so it is not necessarily
7385       // unitary. If it happen to be unity size, that's user fault.
7386       return true;
7387     }
7388 
7389     // Check if the length evaluates to 1.
7390     Expr::EvalResult Result;
7391     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7392       return true; // Can have more that size 1.
7393 
7394     llvm::APSInt ConstLength = Result.Val.getInt();
7395     return ConstLength.getSExtValue() != 1;
7396   }
7397 
7398   /// Generate the base pointers, section pointers, sizes, map type bits, and
7399   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7400   /// map type, map or motion modifiers, and expression components.
7401   /// \a IsFirstComponent should be set to true if the provided set of
7402   /// components is the first associated with a capture.
7403   void generateInfoForComponentList(
7404       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7405       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7406       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7407       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7408       bool IsFirstComponentList, bool IsImplicit,
7409       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7410       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7411       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7412           OverlappedElements = llvm::None) const {
7413     // The following summarizes what has to be generated for each map and the
7414     // types below. The generated information is expressed in this order:
7415     // base pointer, section pointer, size, flags
7416     // (to add to the ones that come from the map type and modifier).
7417     //
7418     // double d;
7419     // int i[100];
7420     // float *p;
7421     //
7422     // struct S1 {
7423     //   int i;
7424     //   float f[50];
7425     // }
7426     // struct S2 {
7427     //   int i;
7428     //   float f[50];
7429     //   S1 s;
7430     //   double *p;
7431     //   struct S2 *ps;
7432     // }
7433     // S2 s;
7434     // S2 *ps;
7435     //
7436     // map(d)
7437     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7438     //
7439     // map(i)
7440     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7441     //
7442     // map(i[1:23])
7443     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7444     //
7445     // map(p)
7446     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7447     //
7448     // map(p[1:24])
7449     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7450     // in unified shared memory mode or for local pointers
7451     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7452     //
7453     // map(s)
7454     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7455     //
7456     // map(s.i)
7457     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7458     //
7459     // map(s.s.f)
7460     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7461     //
7462     // map(s.p)
7463     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7464     //
7465     // map(to: s.p[:22])
7466     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7467     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7468     // &(s.p), &(s.p[0]), 22*sizeof(double),
7469     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7470     // (*) alloc space for struct members, only this is a target parameter
7471     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7472     //      optimizes this entry out, same in the examples below)
7473     // (***) map the pointee (map: to)
7474     //
7475     // map(s.ps)
7476     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7477     //
7478     // map(from: s.ps->s.i)
7479     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7480     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7481     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7482     //
7483     // map(to: s.ps->ps)
7484     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7485     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7486     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7487     //
7488     // map(s.ps->ps->ps)
7489     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7490     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7491     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7492     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7493     //
7494     // map(to: s.ps->ps->s.f[:22])
7495     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7496     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7497     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7498     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7499     //
7500     // map(ps)
7501     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7502     //
7503     // map(ps->i)
7504     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7505     //
7506     // map(ps->s.f)
7507     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7508     //
7509     // map(from: ps->p)
7510     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7511     //
7512     // map(to: ps->p[:22])
7513     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7514     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7515     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7516     //
7517     // map(ps->ps)
7518     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7519     //
7520     // map(from: ps->ps->s.i)
7521     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7522     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7523     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7524     //
7525     // map(from: ps->ps->ps)
7526     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7527     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7528     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7529     //
7530     // map(ps->ps->ps->ps)
7531     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7532     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7533     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7534     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7535     //
7536     // map(to: ps->ps->ps->s.f[:22])
7537     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7538     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7539     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7540     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7541     //
7542     // map(to: s.f[:22]) map(from: s.p[:33])
7543     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7544     //     sizeof(double*) (**), TARGET_PARAM
7545     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7546     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7547     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7548     // (*) allocate contiguous space needed to fit all mapped members even if
7549     //     we allocate space for members not mapped (in this example,
7550     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7551     //     them as well because they fall between &s.f[0] and &s.p)
7552     //
7553     // map(from: s.f[:22]) map(to: ps->p[:33])
7554     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7555     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7556     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7557     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7558     // (*) the struct this entry pertains to is the 2nd element in the list of
7559     //     arguments, hence MEMBER_OF(2)
7560     //
7561     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7562     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7563     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7564     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7565     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7566     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7567     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7568     // (*) the struct this entry pertains to is the 4th element in the list
7569     //     of arguments, hence MEMBER_OF(4)
7570 
7571     // Track if the map information being generated is the first for a capture.
7572     bool IsCaptureFirstInfo = IsFirstComponentList;
7573     // When the variable is on a declare target link or in a to clause with
7574     // unified memory, a reference is needed to hold the host/device address
7575     // of the variable.
7576     bool RequiresReference = false;
7577 
7578     // Scan the components from the base to the complete expression.
7579     auto CI = Components.rbegin();
7580     auto CE = Components.rend();
7581     auto I = CI;
7582 
7583     // Track if the map information being generated is the first for a list of
7584     // components.
7585     bool IsExpressionFirstInfo = true;
7586     bool FirstPointerInComplexData = false;
7587     Address BP = Address::invalid();
7588     const Expr *AssocExpr = I->getAssociatedExpression();
7589     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7590     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7591     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7592 
7593     if (isa<MemberExpr>(AssocExpr)) {
7594       // The base is the 'this' pointer. The content of the pointer is going
7595       // to be the base of the field being mapped.
7596       BP = CGF.LoadCXXThisAddress();
7597     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7598                (OASE &&
7599                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7600       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7601     } else if (OAShE &&
7602                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7603       BP = Address(
7604           CGF.EmitScalarExpr(OAShE->getBase()),
7605           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7606     } else {
7607       // The base is the reference to the variable.
7608       // BP = &Var.
7609       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7610       if (const auto *VD =
7611               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7612         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7613                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7614           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7615               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7616                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7617             RequiresReference = true;
7618             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7619           }
7620         }
7621       }
7622 
7623       // If the variable is a pointer and is being dereferenced (i.e. is not
7624       // the last component), the base has to be the pointer itself, not its
7625       // reference. References are ignored for mapping purposes.
7626       QualType Ty =
7627           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7628       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7629         // No need to generate individual map information for the pointer, it
7630         // can be associated with the combined storage if shared memory mode is
7631         // active or the base declaration is not global variable.
7632         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7633         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7634             !VD || VD->hasLocalStorage())
7635           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7636         else
7637           FirstPointerInComplexData = true;
7638         ++I;
7639       }
7640     }
7641 
7642     // Track whether a component of the list should be marked as MEMBER_OF some
7643     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7644     // in a component list should be marked as MEMBER_OF, all subsequent entries
7645     // do not belong to the base struct. E.g.
7646     // struct S2 s;
7647     // s.ps->ps->ps->f[:]
7648     //   (1) (2) (3) (4)
7649     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7650     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7651     // is the pointee of ps(2) which is not member of struct s, so it should not
7652     // be marked as such (it is still PTR_AND_OBJ).
7653     // The variable is initialized to false so that PTR_AND_OBJ entries which
7654     // are not struct members are not considered (e.g. array of pointers to
7655     // data).
7656     bool ShouldBeMemberOf = false;
7657 
7658     // Variable keeping track of whether or not we have encountered a component
7659     // in the component list which is a member expression. Useful when we have a
7660     // pointer or a final array section, in which case it is the previous
7661     // component in the list which tells us whether we have a member expression.
7662     // E.g. X.f[:]
7663     // While processing the final array section "[:]" it is "f" which tells us
7664     // whether we are dealing with a member of a declared struct.
7665     const MemberExpr *EncounteredME = nullptr;
7666 
7667     // Track for the total number of dimension. Start from one for the dummy
7668     // dimension.
7669     uint64_t DimSize = 1;
7670 
7671     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7672 
7673     for (; I != CE; ++I) {
7674       // If the current component is member of a struct (parent struct) mark it.
7675       if (!EncounteredME) {
7676         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7677         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7678         // as MEMBER_OF the parent struct.
7679         if (EncounteredME) {
7680           ShouldBeMemberOf = true;
7681           // Do not emit as complex pointer if this is actually not array-like
7682           // expression.
7683           if (FirstPointerInComplexData) {
7684             QualType Ty = std::prev(I)
7685                               ->getAssociatedDeclaration()
7686                               ->getType()
7687                               .getNonReferenceType();
7688             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7689             FirstPointerInComplexData = false;
7690           }
7691         }
7692       }
7693 
7694       auto Next = std::next(I);
7695 
7696       // We need to generate the addresses and sizes if this is the last
7697       // component, if the component is a pointer or if it is an array section
7698       // whose length can't be proved to be one. If this is a pointer, it
7699       // becomes the base address for the following components.
7700 
7701       // A final array section, is one whose length can't be proved to be one.
7702       // If the map item is non-contiguous then we don't treat any array section
7703       // as final array section.
7704       bool IsFinalArraySection =
7705           !IsNonContiguous &&
7706           isFinalArraySectionExpression(I->getAssociatedExpression());
7707 
7708       // If we have a declaration for the mapping use that, otherwise use
7709       // the base declaration of the map clause.
7710       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7711                                      ? I->getAssociatedDeclaration()
7712                                      : BaseDecl;
7713 
7714       // Get information on whether the element is a pointer. Have to do a
7715       // special treatment for array sections given that they are built-in
7716       // types.
7717       const auto *OASE =
7718           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7719       const auto *OAShE =
7720           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7721       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7722       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7723       bool IsPointer =
7724           OAShE ||
7725           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7726                        .getCanonicalType()
7727                        ->isAnyPointerType()) ||
7728           I->getAssociatedExpression()->getType()->isAnyPointerType();
7729       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7730 
7731       if (OASE)
7732         ++DimSize;
7733 
7734       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7735         // If this is not the last component, we expect the pointer to be
7736         // associated with an array expression or member expression.
7737         assert((Next == CE ||
7738                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7739                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7740                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7741                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7742                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7743                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7744                "Unexpected expression");
7745 
7746         Address LB = Address::invalid();
7747         if (OAShE) {
7748           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7749                        CGF.getContext().getTypeAlignInChars(
7750                            OAShE->getBase()->getType()));
7751         } else {
7752           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7753                    .getAddress(CGF);
7754         }
7755 
7756         // If this component is a pointer inside the base struct then we don't
7757         // need to create any entry for it - it will be combined with the object
7758         // it is pointing to into a single PTR_AND_OBJ entry.
7759         bool IsMemberPointerOrAddr =
7760             (IsPointer || ForDeviceAddr) && EncounteredME &&
7761             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7762              EncounteredME);
7763         if (!OverlappedElements.empty() && Next == CE) {
7764           // Handle base element with the info for overlapped elements.
7765           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7766           assert(!IsPointer &&
7767                  "Unexpected base element with the pointer type.");
7768           // Mark the whole struct as the struct that requires allocation on the
7769           // device.
7770           PartialStruct.LowestElem = {0, LB};
7771           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7772               I->getAssociatedExpression()->getType());
7773           Address HB = CGF.Builder.CreateConstGEP(
7774               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7775                                                               CGF.VoidPtrTy),
7776               TypeSize.getQuantity() - 1);
7777           PartialStruct.HighestElem = {
7778               std::numeric_limits<decltype(
7779                   PartialStruct.HighestElem.first)>::max(),
7780               HB};
7781           PartialStruct.Base = BP;
7782           PartialStruct.LB = LB;
7783           assert(
7784               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7785               "Overlapped elements must be used only once for the variable.");
7786           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7787           // Emit data for non-overlapped data.
7788           OpenMPOffloadMappingFlags Flags =
7789               OMP_MAP_MEMBER_OF |
7790               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7791                              /*AddPtrFlag=*/false,
7792                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7793           llvm::Value *Size = nullptr;
7794           // Do bitcopy of all non-overlapped structure elements.
7795           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7796                    Component : OverlappedElements) {
7797             Address ComponentLB = Address::invalid();
7798             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7799                  Component) {
7800               if (MC.getAssociatedDeclaration()) {
7801                 ComponentLB =
7802                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7803                         .getAddress(CGF);
7804                 Size = CGF.Builder.CreatePtrDiff(
7805                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7806                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7807                 break;
7808               }
7809             }
7810             assert(Size && "Failed to determine structure size");
7811             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7812             CombinedInfo.BasePointers.push_back(BP.getPointer());
7813             CombinedInfo.Pointers.push_back(LB.getPointer());
7814             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7815                 Size, CGF.Int64Ty, /*isSigned=*/true));
7816             CombinedInfo.Types.push_back(Flags);
7817             CombinedInfo.Mappers.push_back(nullptr);
7818             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7819                                                                       : 1);
7820             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7821           }
7822           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7823           CombinedInfo.BasePointers.push_back(BP.getPointer());
7824           CombinedInfo.Pointers.push_back(LB.getPointer());
7825           Size = CGF.Builder.CreatePtrDiff(
7826               CGF.EmitCastToVoidPtr(
7827                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7828               CGF.EmitCastToVoidPtr(LB.getPointer()));
7829           CombinedInfo.Sizes.push_back(
7830               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7831           CombinedInfo.Types.push_back(Flags);
7832           CombinedInfo.Mappers.push_back(nullptr);
7833           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7834                                                                     : 1);
7835           break;
7836         }
7837         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7838         if (!IsMemberPointerOrAddr ||
7839             (Next == CE && MapType != OMPC_MAP_unknown)) {
7840           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7841           CombinedInfo.BasePointers.push_back(BP.getPointer());
7842           CombinedInfo.Pointers.push_back(LB.getPointer());
7843           CombinedInfo.Sizes.push_back(
7844               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7845           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7846                                                                     : 1);
7847 
7848           // If Mapper is valid, the last component inherits the mapper.
7849           bool HasMapper = Mapper && Next == CE;
7850           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7851 
7852           // We need to add a pointer flag for each map that comes from the
7853           // same expression except for the first one. We also need to signal
7854           // this map is the first one that relates with the current capture
7855           // (there is a set of entries for each capture).
7856           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7857               MapType, MapModifiers, MotionModifiers, IsImplicit,
7858               !IsExpressionFirstInfo || RequiresReference ||
7859                   FirstPointerInComplexData,
7860               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7861 
7862           if (!IsExpressionFirstInfo) {
7863             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7864             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7865             if (IsPointer)
7866               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7867                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7868 
7869             if (ShouldBeMemberOf) {
7870               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7871               // should be later updated with the correct value of MEMBER_OF.
7872               Flags |= OMP_MAP_MEMBER_OF;
7873               // From now on, all subsequent PTR_AND_OBJ entries should not be
7874               // marked as MEMBER_OF.
7875               ShouldBeMemberOf = false;
7876             }
7877           }
7878 
7879           CombinedInfo.Types.push_back(Flags);
7880         }
7881 
7882         // If we have encountered a member expression so far, keep track of the
7883         // mapped member. If the parent is "*this", then the value declaration
7884         // is nullptr.
7885         if (EncounteredME) {
7886           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7887           unsigned FieldIndex = FD->getFieldIndex();
7888 
7889           // Update info about the lowest and highest elements for this struct
7890           if (!PartialStruct.Base.isValid()) {
7891             PartialStruct.LowestElem = {FieldIndex, LB};
7892             if (IsFinalArraySection) {
7893               Address HB =
7894                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7895                       .getAddress(CGF);
7896               PartialStruct.HighestElem = {FieldIndex, HB};
7897             } else {
7898               PartialStruct.HighestElem = {FieldIndex, LB};
7899             }
7900             PartialStruct.Base = BP;
7901             PartialStruct.LB = BP;
7902           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7903             PartialStruct.LowestElem = {FieldIndex, LB};
7904           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7905             PartialStruct.HighestElem = {FieldIndex, LB};
7906           }
7907         }
7908 
7909         // Need to emit combined struct for array sections.
7910         if (IsFinalArraySection || IsNonContiguous)
7911           PartialStruct.IsArraySection = true;
7912 
7913         // If we have a final array section, we are done with this expression.
7914         if (IsFinalArraySection)
7915           break;
7916 
7917         // The pointer becomes the base for the next element.
7918         if (Next != CE)
7919           BP = LB;
7920 
7921         IsExpressionFirstInfo = false;
7922         IsCaptureFirstInfo = false;
7923         FirstPointerInComplexData = false;
7924       } else if (FirstPointerInComplexData) {
7925         QualType Ty = Components.rbegin()
7926                           ->getAssociatedDeclaration()
7927                           ->getType()
7928                           .getNonReferenceType();
7929         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7930         FirstPointerInComplexData = false;
7931       }
7932     }
7933     // If ran into the whole component - allocate the space for the whole
7934     // record.
7935     if (!EncounteredME)
7936       PartialStruct.HasCompleteRecord = true;
7937 
7938     if (!IsNonContiguous)
7939       return;
7940 
7941     const ASTContext &Context = CGF.getContext();
7942 
7943     // For supporting stride in array section, we need to initialize the first
7944     // dimension size as 1, first offset as 0, and first count as 1
7945     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7946     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7947     MapValuesArrayTy CurStrides;
7948     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7949     uint64_t ElementTypeSize;
7950 
7951     // Collect Size information for each dimension and get the element size as
7952     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7953     // should be [10, 10] and the first stride is 4 btyes.
7954     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7955          Components) {
7956       const Expr *AssocExpr = Component.getAssociatedExpression();
7957       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7958 
7959       if (!OASE)
7960         continue;
7961 
7962       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7963       auto *CAT = Context.getAsConstantArrayType(Ty);
7964       auto *VAT = Context.getAsVariableArrayType(Ty);
7965 
7966       // We need all the dimension size except for the last dimension.
7967       assert((VAT || CAT || &Component == &*Components.begin()) &&
7968              "Should be either ConstantArray or VariableArray if not the "
7969              "first Component");
7970 
7971       // Get element size if CurStrides is empty.
7972       if (CurStrides.empty()) {
7973         const Type *ElementType = nullptr;
7974         if (CAT)
7975           ElementType = CAT->getElementType().getTypePtr();
7976         else if (VAT)
7977           ElementType = VAT->getElementType().getTypePtr();
7978         else
7979           assert(&Component == &*Components.begin() &&
7980                  "Only expect pointer (non CAT or VAT) when this is the "
7981                  "first Component");
7982         // If ElementType is null, then it means the base is a pointer
7983         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7984         // for next iteration.
7985         if (ElementType) {
7986           // For the case that having pointer as base, we need to remove one
7987           // level of indirection.
7988           if (&Component != &*Components.begin())
7989             ElementType = ElementType->getPointeeOrArrayElementType();
7990           ElementTypeSize =
7991               Context.getTypeSizeInChars(ElementType).getQuantity();
7992           CurStrides.push_back(
7993               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7994         }
7995       }
7996       // Get dimension value except for the last dimension since we don't need
7997       // it.
7998       if (DimSizes.size() < Components.size() - 1) {
7999         if (CAT)
8000           DimSizes.push_back(llvm::ConstantInt::get(
8001               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8002         else if (VAT)
8003           DimSizes.push_back(CGF.Builder.CreateIntCast(
8004               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8005               /*IsSigned=*/false));
8006       }
8007     }
8008 
8009     // Skip the dummy dimension since we have already have its information.
8010     auto DI = DimSizes.begin() + 1;
8011     // Product of dimension.
8012     llvm::Value *DimProd =
8013         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8014 
8015     // Collect info for non-contiguous. Notice that offset, count, and stride
8016     // are only meaningful for array-section, so we insert a null for anything
8017     // other than array-section.
8018     // Also, the size of offset, count, and stride are not the same as
8019     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8020     // count, and stride are the same as the number of non-contiguous
8021     // declaration in target update to/from clause.
8022     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8023          Components) {
8024       const Expr *AssocExpr = Component.getAssociatedExpression();
8025 
8026       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8027         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8028             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8029             /*isSigned=*/false);
8030         CurOffsets.push_back(Offset);
8031         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8032         CurStrides.push_back(CurStrides.back());
8033         continue;
8034       }
8035 
8036       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8037 
8038       if (!OASE)
8039         continue;
8040 
8041       // Offset
8042       const Expr *OffsetExpr = OASE->getLowerBound();
8043       llvm::Value *Offset = nullptr;
8044       if (!OffsetExpr) {
8045         // If offset is absent, then we just set it to zero.
8046         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8047       } else {
8048         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8049                                            CGF.Int64Ty,
8050                                            /*isSigned=*/false);
8051       }
8052       CurOffsets.push_back(Offset);
8053 
8054       // Count
8055       const Expr *CountExpr = OASE->getLength();
8056       llvm::Value *Count = nullptr;
8057       if (!CountExpr) {
8058         // In Clang, once a high dimension is an array section, we construct all
8059         // the lower dimension as array section, however, for case like
8060         // arr[0:2][2], Clang construct the inner dimension as an array section
8061         // but it actually is not in an array section form according to spec.
8062         if (!OASE->getColonLocFirst().isValid() &&
8063             !OASE->getColonLocSecond().isValid()) {
8064           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8065         } else {
8066           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8067           // When the length is absent it defaults to ⌈(size −
8068           // lower-bound)/stride⌉, where size is the size of the array
8069           // dimension.
8070           const Expr *StrideExpr = OASE->getStride();
8071           llvm::Value *Stride =
8072               StrideExpr
8073                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8074                                               CGF.Int64Ty, /*isSigned=*/false)
8075                   : nullptr;
8076           if (Stride)
8077             Count = CGF.Builder.CreateUDiv(
8078                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8079           else
8080             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8081         }
8082       } else {
8083         Count = CGF.EmitScalarExpr(CountExpr);
8084       }
8085       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8086       CurCounts.push_back(Count);
8087 
8088       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8089       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8090       //              Offset      Count     Stride
8091       //    D0          0           1         4    (int)    <- dummy dimension
8092       //    D1          0           2         8    (2 * (1) * 4)
8093       //    D2          1           2         20   (1 * (1 * 5) * 4)
8094       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8095       const Expr *StrideExpr = OASE->getStride();
8096       llvm::Value *Stride =
8097           StrideExpr
8098               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8099                                           CGF.Int64Ty, /*isSigned=*/false)
8100               : nullptr;
8101       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8102       if (Stride)
8103         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8104       else
8105         CurStrides.push_back(DimProd);
8106       if (DI != DimSizes.end())
8107         ++DI;
8108     }
8109 
8110     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8111     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8112     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8113   }
8114 
8115   /// Return the adjusted map modifiers if the declaration a capture refers to
8116   /// appears in a first-private clause. This is expected to be used only with
8117   /// directives that start with 'target'.
8118   MappableExprsHandler::OpenMPOffloadMappingFlags
8119   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8120     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8121 
8122     // A first private variable captured by reference will use only the
8123     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8124     // declaration is known as first-private in this handler.
8125     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8126       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8127           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8128         return MappableExprsHandler::OMP_MAP_ALWAYS |
8129                MappableExprsHandler::OMP_MAP_TO;
8130       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8131         return MappableExprsHandler::OMP_MAP_TO |
8132                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8133       return MappableExprsHandler::OMP_MAP_PRIVATE |
8134              MappableExprsHandler::OMP_MAP_TO;
8135     }
8136     return MappableExprsHandler::OMP_MAP_TO |
8137            MappableExprsHandler::OMP_MAP_FROM;
8138   }
8139 
8140   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8141     // Rotate by getFlagMemberOffset() bits.
8142     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8143                                                   << getFlagMemberOffset());
8144   }
8145 
8146   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8147                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8148     // If the entry is PTR_AND_OBJ but has not been marked with the special
8149     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8150     // marked as MEMBER_OF.
8151     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8152         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8153       return;
8154 
8155     // Reset the placeholder value to prepare the flag for the assignment of the
8156     // proper MEMBER_OF value.
8157     Flags &= ~OMP_MAP_MEMBER_OF;
8158     Flags |= MemberOfFlag;
8159   }
8160 
8161   void getPlainLayout(const CXXRecordDecl *RD,
8162                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8163                       bool AsBase) const {
8164     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8165 
8166     llvm::StructType *St =
8167         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8168 
8169     unsigned NumElements = St->getNumElements();
8170     llvm::SmallVector<
8171         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8172         RecordLayout(NumElements);
8173 
8174     // Fill bases.
8175     for (const auto &I : RD->bases()) {
8176       if (I.isVirtual())
8177         continue;
8178       const auto *Base = I.getType()->getAsCXXRecordDecl();
8179       // Ignore empty bases.
8180       if (Base->isEmpty() || CGF.getContext()
8181                                  .getASTRecordLayout(Base)
8182                                  .getNonVirtualSize()
8183                                  .isZero())
8184         continue;
8185 
8186       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8187       RecordLayout[FieldIndex] = Base;
8188     }
8189     // Fill in virtual bases.
8190     for (const auto &I : RD->vbases()) {
8191       const auto *Base = I.getType()->getAsCXXRecordDecl();
8192       // Ignore empty bases.
8193       if (Base->isEmpty())
8194         continue;
8195       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8196       if (RecordLayout[FieldIndex])
8197         continue;
8198       RecordLayout[FieldIndex] = Base;
8199     }
8200     // Fill in all the fields.
8201     assert(!RD->isUnion() && "Unexpected union.");
8202     for (const auto *Field : RD->fields()) {
8203       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8204       // will fill in later.)
8205       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8206         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8207         RecordLayout[FieldIndex] = Field;
8208       }
8209     }
8210     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8211              &Data : RecordLayout) {
8212       if (Data.isNull())
8213         continue;
8214       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8215         getPlainLayout(Base, Layout, /*AsBase=*/true);
8216       else
8217         Layout.push_back(Data.get<const FieldDecl *>());
8218     }
8219   }
8220 
8221   /// Generate all the base pointers, section pointers, sizes, map types, and
8222   /// mappers for the extracted mappable expressions (all included in \a
8223   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8224   /// pair of the relevant declaration and index where it occurs is appended to
8225   /// the device pointers info array.
8226   void generateAllInfoForClauses(
8227       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8228       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8229           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8230     // We have to process the component lists that relate with the same
8231     // declaration in a single chunk so that we can generate the map flags
8232     // correctly. Therefore, we organize all lists in a map.
8233     enum MapKind { Present, Allocs, Other, Total };
8234     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8235                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8236         Info;
8237 
8238     // Helper function to fill the information map for the different supported
8239     // clauses.
8240     auto &&InfoGen =
8241         [&Info, &SkipVarSet](
8242             const ValueDecl *D, MapKind Kind,
8243             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8244             OpenMPMapClauseKind MapType,
8245             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8246             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8247             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8248             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8249           if (SkipVarSet.contains(D))
8250             return;
8251           auto It = Info.find(D);
8252           if (It == Info.end())
8253             It = Info
8254                      .insert(std::make_pair(
8255                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8256                      .first;
8257           It->second[Kind].emplace_back(
8258               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8259               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8260         };
8261 
8262     for (const auto *Cl : Clauses) {
8263       const auto *C = dyn_cast<OMPMapClause>(Cl);
8264       if (!C)
8265         continue;
8266       MapKind Kind = Other;
8267       if (!C->getMapTypeModifiers().empty() &&
8268           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8269             return K == OMPC_MAP_MODIFIER_present;
8270           }))
8271         Kind = Present;
8272       else if (C->getMapType() == OMPC_MAP_alloc)
8273         Kind = Allocs;
8274       const auto *EI = C->getVarRefs().begin();
8275       for (const auto L : C->component_lists()) {
8276         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8277         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8278                 C->getMapTypeModifiers(), llvm::None,
8279                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8280                 E);
8281         ++EI;
8282       }
8283     }
8284     for (const auto *Cl : Clauses) {
8285       const auto *C = dyn_cast<OMPToClause>(Cl);
8286       if (!C)
8287         continue;
8288       MapKind Kind = Other;
8289       if (!C->getMotionModifiers().empty() &&
8290           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8291             return K == OMPC_MOTION_MODIFIER_present;
8292           }))
8293         Kind = Present;
8294       const auto *EI = C->getVarRefs().begin();
8295       for (const auto L : C->component_lists()) {
8296         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8297                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8298                 C->isImplicit(), std::get<2>(L), *EI);
8299         ++EI;
8300       }
8301     }
8302     for (const auto *Cl : Clauses) {
8303       const auto *C = dyn_cast<OMPFromClause>(Cl);
8304       if (!C)
8305         continue;
8306       MapKind Kind = Other;
8307       if (!C->getMotionModifiers().empty() &&
8308           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8309             return K == OMPC_MOTION_MODIFIER_present;
8310           }))
8311         Kind = Present;
8312       const auto *EI = C->getVarRefs().begin();
8313       for (const auto L : C->component_lists()) {
8314         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8315                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8316                 C->isImplicit(), std::get<2>(L), *EI);
8317         ++EI;
8318       }
8319     }
8320 
8321     // Look at the use_device_ptr clause information and mark the existing map
8322     // entries as such. If there is no map information for an entry in the
8323     // use_device_ptr list, we create one with map type 'alloc' and zero size
8324     // section. It is the user fault if that was not mapped before. If there is
8325     // no map information and the pointer is a struct member, then we defer the
8326     // emission of that entry until the whole struct has been processed.
8327     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8328                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8329         DeferredInfo;
8330     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8331 
8332     for (const auto *Cl : Clauses) {
8333       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8334       if (!C)
8335         continue;
8336       for (const auto L : C->component_lists()) {
8337         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8338             std::get<1>(L);
8339         assert(!Components.empty() &&
8340                "Not expecting empty list of components!");
8341         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8342         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8343         const Expr *IE = Components.back().getAssociatedExpression();
8344         // If the first component is a member expression, we have to look into
8345         // 'this', which maps to null in the map of map information. Otherwise
8346         // look directly for the information.
8347         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8348 
8349         // We potentially have map information for this declaration already.
8350         // Look for the first set of components that refer to it.
8351         if (It != Info.end()) {
8352           bool Found = false;
8353           for (auto &Data : It->second) {
8354             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8355               return MI.Components.back().getAssociatedDeclaration() == VD;
8356             });
8357             // If we found a map entry, signal that the pointer has to be
8358             // returned and move on to the next declaration. Exclude cases where
8359             // the base pointer is mapped as array subscript, array section or
8360             // array shaping. The base address is passed as a pointer to base in
8361             // this case and cannot be used as a base for use_device_ptr list
8362             // item.
8363             if (CI != Data.end()) {
8364               auto PrevCI = std::next(CI->Components.rbegin());
8365               const auto *VarD = dyn_cast<VarDecl>(VD);
8366               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8367                   isa<MemberExpr>(IE) ||
8368                   !VD->getType().getNonReferenceType()->isPointerType() ||
8369                   PrevCI == CI->Components.rend() ||
8370                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8371                   VarD->hasLocalStorage()) {
8372                 CI->ReturnDevicePointer = true;
8373                 Found = true;
8374                 break;
8375               }
8376             }
8377           }
8378           if (Found)
8379             continue;
8380         }
8381 
8382         // We didn't find any match in our map information - generate a zero
8383         // size array section - if the pointer is a struct member we defer this
8384         // action until the whole struct has been processed.
8385         if (isa<MemberExpr>(IE)) {
8386           // Insert the pointer into Info to be processed by
8387           // generateInfoForComponentList. Because it is a member pointer
8388           // without a pointee, no entry will be generated for it, therefore
8389           // we need to generate one after the whole struct has been processed.
8390           // Nonetheless, generateInfoForComponentList must be called to take
8391           // the pointer into account for the calculation of the range of the
8392           // partial struct.
8393           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8394                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8395                   nullptr);
8396           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8397         } else {
8398           llvm::Value *Ptr =
8399               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8400           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8401           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8402           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8403           UseDevicePtrCombinedInfo.Sizes.push_back(
8404               llvm::Constant::getNullValue(CGF.Int64Ty));
8405           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8406           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8407         }
8408       }
8409     }
8410 
8411     // Look at the use_device_addr clause information and mark the existing map
8412     // entries as such. If there is no map information for an entry in the
8413     // use_device_addr list, we create one with map type 'alloc' and zero size
8414     // section. It is the user fault if that was not mapped before. If there is
8415     // no map information and the pointer is a struct member, then we defer the
8416     // emission of that entry until the whole struct has been processed.
8417     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8418     for (const auto *Cl : Clauses) {
8419       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8420       if (!C)
8421         continue;
8422       for (const auto L : C->component_lists()) {
8423         assert(!std::get<1>(L).empty() &&
8424                "Not expecting empty list of components!");
8425         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8426         if (!Processed.insert(VD).second)
8427           continue;
8428         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8429         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8430         // If the first component is a member expression, we have to look into
8431         // 'this', which maps to null in the map of map information. Otherwise
8432         // look directly for the information.
8433         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8434 
8435         // We potentially have map information for this declaration already.
8436         // Look for the first set of components that refer to it.
8437         if (It != Info.end()) {
8438           bool Found = false;
8439           for (auto &Data : It->second) {
8440             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8441               return MI.Components.back().getAssociatedDeclaration() == VD;
8442             });
8443             // If we found a map entry, signal that the pointer has to be
8444             // returned and move on to the next declaration.
8445             if (CI != Data.end()) {
8446               CI->ReturnDevicePointer = true;
8447               Found = true;
8448               break;
8449             }
8450           }
8451           if (Found)
8452             continue;
8453         }
8454 
8455         // We didn't find any match in our map information - generate a zero
8456         // size array section - if the pointer is a struct member we defer this
8457         // action until the whole struct has been processed.
8458         if (isa<MemberExpr>(IE)) {
8459           // Insert the pointer into Info to be processed by
8460           // generateInfoForComponentList. Because it is a member pointer
8461           // without a pointee, no entry will be generated for it, therefore
8462           // we need to generate one after the whole struct has been processed.
8463           // Nonetheless, generateInfoForComponentList must be called to take
8464           // the pointer into account for the calculation of the range of the
8465           // partial struct.
8466           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8467                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8468                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8469           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8470         } else {
8471           llvm::Value *Ptr;
8472           if (IE->isGLValue())
8473             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8474           else
8475             Ptr = CGF.EmitScalarExpr(IE);
8476           CombinedInfo.Exprs.push_back(VD);
8477           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8478           CombinedInfo.Pointers.push_back(Ptr);
8479           CombinedInfo.Sizes.push_back(
8480               llvm::Constant::getNullValue(CGF.Int64Ty));
8481           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8482           CombinedInfo.Mappers.push_back(nullptr);
8483         }
8484       }
8485     }
8486 
8487     for (const auto &Data : Info) {
8488       StructRangeInfoTy PartialStruct;
8489       // Temporary generated information.
8490       MapCombinedInfoTy CurInfo;
8491       const Decl *D = Data.first;
8492       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8493       for (const auto &M : Data.second) {
8494         for (const MapInfo &L : M) {
8495           assert(!L.Components.empty() &&
8496                  "Not expecting declaration with no component lists.");
8497 
8498           // Remember the current base pointer index.
8499           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8500           CurInfo.NonContigInfo.IsNonContiguous =
8501               L.Components.back().isNonContiguous();
8502           generateInfoForComponentList(
8503               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8504               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8505               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8506 
8507           // If this entry relates with a device pointer, set the relevant
8508           // declaration and add the 'return pointer' flag.
8509           if (L.ReturnDevicePointer) {
8510             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8511                    "Unexpected number of mapped base pointers.");
8512 
8513             const ValueDecl *RelevantVD =
8514                 L.Components.back().getAssociatedDeclaration();
8515             assert(RelevantVD &&
8516                    "No relevant declaration related with device pointer??");
8517 
8518             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8519                 RelevantVD);
8520             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8521           }
8522         }
8523       }
8524 
8525       // Append any pending zero-length pointers which are struct members and
8526       // used with use_device_ptr or use_device_addr.
8527       auto CI = DeferredInfo.find(Data.first);
8528       if (CI != DeferredInfo.end()) {
8529         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8530           llvm::Value *BasePtr;
8531           llvm::Value *Ptr;
8532           if (L.ForDeviceAddr) {
8533             if (L.IE->isGLValue())
8534               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8535             else
8536               Ptr = this->CGF.EmitScalarExpr(L.IE);
8537             BasePtr = Ptr;
8538             // Entry is RETURN_PARAM. Also, set the placeholder value
8539             // MEMBER_OF=FFFF so that the entry is later updated with the
8540             // correct value of MEMBER_OF.
8541             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8542           } else {
8543             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8544             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8545                                              L.IE->getExprLoc());
8546             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8547             // placeholder value MEMBER_OF=FFFF so that the entry is later
8548             // updated with the correct value of MEMBER_OF.
8549             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8550                                     OMP_MAP_MEMBER_OF);
8551           }
8552           CurInfo.Exprs.push_back(L.VD);
8553           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8554           CurInfo.Pointers.push_back(Ptr);
8555           CurInfo.Sizes.push_back(
8556               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8557           CurInfo.Mappers.push_back(nullptr);
8558         }
8559       }
8560       // If there is an entry in PartialStruct it means we have a struct with
8561       // individual members mapped. Emit an extra combined entry.
8562       if (PartialStruct.Base.isValid()) {
8563         CurInfo.NonContigInfo.Dims.push_back(0);
8564         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8565       }
8566 
8567       // We need to append the results of this capture to what we already
8568       // have.
8569       CombinedInfo.append(CurInfo);
8570     }
8571     // Append data for use_device_ptr clauses.
8572     CombinedInfo.append(UseDevicePtrCombinedInfo);
8573   }
8574 
8575 public:
8576   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8577       : CurDir(&Dir), CGF(CGF) {
8578     // Extract firstprivate clause information.
8579     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8580       for (const auto *D : C->varlists())
8581         FirstPrivateDecls.try_emplace(
8582             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8583     // Extract implicit firstprivates from uses_allocators clauses.
8584     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8585       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8586         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8587         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8588           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8589                                         /*Implicit=*/true);
8590         else if (const auto *VD = dyn_cast<VarDecl>(
8591                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8592                          ->getDecl()))
8593           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8594       }
8595     }
8596     // Extract device pointer clause information.
8597     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8598       for (auto L : C->component_lists())
8599         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8600   }
8601 
8602   /// Constructor for the declare mapper directive.
8603   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8604       : CurDir(&Dir), CGF(CGF) {}
8605 
8606   /// Generate code for the combined entry if we have a partially mapped struct
8607   /// and take care of the mapping flags of the arguments corresponding to
8608   /// individual struct members.
8609   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8610                          MapFlagsArrayTy &CurTypes,
8611                          const StructRangeInfoTy &PartialStruct,
8612                          const ValueDecl *VD = nullptr,
8613                          bool NotTargetParams = true) const {
8614     if (CurTypes.size() == 1 &&
8615         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8616         !PartialStruct.IsArraySection)
8617       return;
8618     Address LBAddr = PartialStruct.LowestElem.second;
8619     Address HBAddr = PartialStruct.HighestElem.second;
8620     if (PartialStruct.HasCompleteRecord) {
8621       LBAddr = PartialStruct.LB;
8622       HBAddr = PartialStruct.LB;
8623     }
8624     CombinedInfo.Exprs.push_back(VD);
8625     // Base is the base of the struct
8626     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8627     // Pointer is the address of the lowest element
8628     llvm::Value *LB = LBAddr.getPointer();
8629     CombinedInfo.Pointers.push_back(LB);
8630     // There should not be a mapper for a combined entry.
8631     CombinedInfo.Mappers.push_back(nullptr);
8632     // Size is (addr of {highest+1} element) - (addr of lowest element)
8633     llvm::Value *HB = HBAddr.getPointer();
8634     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8635     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8636     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8637     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8638     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8639                                                   /*isSigned=*/false);
8640     CombinedInfo.Sizes.push_back(Size);
8641     // Map type is always TARGET_PARAM, if generate info for captures.
8642     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8643                                                  : OMP_MAP_TARGET_PARAM);
8644     // If any element has the present modifier, then make sure the runtime
8645     // doesn't attempt to allocate the struct.
8646     if (CurTypes.end() !=
8647         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8648           return Type & OMP_MAP_PRESENT;
8649         }))
8650       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8651     // Remove TARGET_PARAM flag from the first element
8652     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8653 
8654     // All other current entries will be MEMBER_OF the combined entry
8655     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8656     // 0xFFFF in the MEMBER_OF field).
8657     OpenMPOffloadMappingFlags MemberOfFlag =
8658         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8659     for (auto &M : CurTypes)
8660       setCorrectMemberOfFlag(M, MemberOfFlag);
8661   }
8662 
8663   /// Generate all the base pointers, section pointers, sizes, map types, and
8664   /// mappers for the extracted mappable expressions (all included in \a
8665   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8666   /// pair of the relevant declaration and index where it occurs is appended to
8667   /// the device pointers info array.
8668   void generateAllInfo(
8669       MapCombinedInfoTy &CombinedInfo,
8670       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8671           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8672     assert(CurDir.is<const OMPExecutableDirective *>() &&
8673            "Expect a executable directive");
8674     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8675     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8676   }
8677 
8678   /// Generate all the base pointers, section pointers, sizes, map types, and
8679   /// mappers for the extracted map clauses of user-defined mapper (all included
8680   /// in \a CombinedInfo).
8681   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8682     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8683            "Expect a declare mapper directive");
8684     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8685     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8686   }
8687 
8688   /// Emit capture info for lambdas for variables captured by reference.
8689   void generateInfoForLambdaCaptures(
8690       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8691       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8692     const auto *RD = VD->getType()
8693                          .getCanonicalType()
8694                          .getNonReferenceType()
8695                          ->getAsCXXRecordDecl();
8696     if (!RD || !RD->isLambda())
8697       return;
8698     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8699     LValue VDLVal = CGF.MakeAddrLValue(
8700         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8701     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8702     FieldDecl *ThisCapture = nullptr;
8703     RD->getCaptureFields(Captures, ThisCapture);
8704     if (ThisCapture) {
8705       LValue ThisLVal =
8706           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8707       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8708       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8709                                  VDLVal.getPointer(CGF));
8710       CombinedInfo.Exprs.push_back(VD);
8711       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8712       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8713       CombinedInfo.Sizes.push_back(
8714           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8715                                     CGF.Int64Ty, /*isSigned=*/true));
8716       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8717                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8718       CombinedInfo.Mappers.push_back(nullptr);
8719     }
8720     for (const LambdaCapture &LC : RD->captures()) {
8721       if (!LC.capturesVariable())
8722         continue;
8723       const VarDecl *VD = LC.getCapturedVar();
8724       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8725         continue;
8726       auto It = Captures.find(VD);
8727       assert(It != Captures.end() && "Found lambda capture without field.");
8728       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8729       if (LC.getCaptureKind() == LCK_ByRef) {
8730         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8731         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8732                                    VDLVal.getPointer(CGF));
8733         CombinedInfo.Exprs.push_back(VD);
8734         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8735         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8736         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8737             CGF.getTypeSize(
8738                 VD->getType().getCanonicalType().getNonReferenceType()),
8739             CGF.Int64Ty, /*isSigned=*/true));
8740       } else {
8741         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8742         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8743                                    VDLVal.getPointer(CGF));
8744         CombinedInfo.Exprs.push_back(VD);
8745         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8746         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8747         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8748       }
8749       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8750                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8751       CombinedInfo.Mappers.push_back(nullptr);
8752     }
8753   }
8754 
8755   /// Set correct indices for lambdas captures.
8756   void adjustMemberOfForLambdaCaptures(
8757       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8758       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8759       MapFlagsArrayTy &Types) const {
8760     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8761       // Set correct member_of idx for all implicit lambda captures.
8762       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8763                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8764         continue;
8765       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8766       assert(BasePtr && "Unable to find base lambda address.");
8767       int TgtIdx = -1;
8768       for (unsigned J = I; J > 0; --J) {
8769         unsigned Idx = J - 1;
8770         if (Pointers[Idx] != BasePtr)
8771           continue;
8772         TgtIdx = Idx;
8773         break;
8774       }
8775       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8776       // All other current entries will be MEMBER_OF the combined entry
8777       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8778       // 0xFFFF in the MEMBER_OF field).
8779       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8780       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8781     }
8782   }
8783 
8784   /// Generate the base pointers, section pointers, sizes, map types, and
8785   /// mappers associated to a given capture (all included in \a CombinedInfo).
8786   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8787                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8788                               StructRangeInfoTy &PartialStruct) const {
8789     assert(!Cap->capturesVariableArrayType() &&
8790            "Not expecting to generate map info for a variable array type!");
8791 
8792     // We need to know when we generating information for the first component
8793     const ValueDecl *VD = Cap->capturesThis()
8794                               ? nullptr
8795                               : Cap->getCapturedVar()->getCanonicalDecl();
8796 
8797     // If this declaration appears in a is_device_ptr clause we just have to
8798     // pass the pointer by value. If it is a reference to a declaration, we just
8799     // pass its value.
8800     if (DevPointersMap.count(VD)) {
8801       CombinedInfo.Exprs.push_back(VD);
8802       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8803       CombinedInfo.Pointers.push_back(Arg);
8804       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8805           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8806           /*isSigned=*/true));
8807       CombinedInfo.Types.push_back(
8808           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8809           OMP_MAP_TARGET_PARAM);
8810       CombinedInfo.Mappers.push_back(nullptr);
8811       return;
8812     }
8813 
8814     using MapData =
8815         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8816                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8817                    const ValueDecl *, const Expr *>;
8818     SmallVector<MapData, 4> DeclComponentLists;
8819     assert(CurDir.is<const OMPExecutableDirective *>() &&
8820            "Expect a executable directive");
8821     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8822     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8823       const auto *EI = C->getVarRefs().begin();
8824       for (const auto L : C->decl_component_lists(VD)) {
8825         const ValueDecl *VDecl, *Mapper;
8826         // The Expression is not correct if the mapping is implicit
8827         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8828         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8829         std::tie(VDecl, Components, Mapper) = L;
8830         assert(VDecl == VD && "We got information for the wrong declaration??");
8831         assert(!Components.empty() &&
8832                "Not expecting declaration with no component lists.");
8833         DeclComponentLists.emplace_back(Components, C->getMapType(),
8834                                         C->getMapTypeModifiers(),
8835                                         C->isImplicit(), Mapper, E);
8836         ++EI;
8837       }
8838     }
8839     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8840                                              const MapData &RHS) {
8841       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8842       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8843       bool HasPresent = !MapModifiers.empty() &&
8844                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8845                           return K == clang::OMPC_MAP_MODIFIER_present;
8846                         });
8847       bool HasAllocs = MapType == OMPC_MAP_alloc;
8848       MapModifiers = std::get<2>(RHS);
8849       MapType = std::get<1>(LHS);
8850       bool HasPresentR =
8851           !MapModifiers.empty() &&
8852           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8853             return K == clang::OMPC_MAP_MODIFIER_present;
8854           });
8855       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8856       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8857     });
8858 
8859     // Find overlapping elements (including the offset from the base element).
8860     llvm::SmallDenseMap<
8861         const MapData *,
8862         llvm::SmallVector<
8863             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8864         4>
8865         OverlappedData;
8866     size_t Count = 0;
8867     for (const MapData &L : DeclComponentLists) {
8868       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8869       OpenMPMapClauseKind MapType;
8870       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8871       bool IsImplicit;
8872       const ValueDecl *Mapper;
8873       const Expr *VarRef;
8874       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8875           L;
8876       ++Count;
8877       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8878         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8879         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8880                  VarRef) = L1;
8881         auto CI = Components.rbegin();
8882         auto CE = Components.rend();
8883         auto SI = Components1.rbegin();
8884         auto SE = Components1.rend();
8885         for (; CI != CE && SI != SE; ++CI, ++SI) {
8886           if (CI->getAssociatedExpression()->getStmtClass() !=
8887               SI->getAssociatedExpression()->getStmtClass())
8888             break;
8889           // Are we dealing with different variables/fields?
8890           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8891             break;
8892         }
8893         // Found overlapping if, at least for one component, reached the head
8894         // of the components list.
8895         if (CI == CE || SI == SE) {
8896           // Ignore it if it is the same component.
8897           if (CI == CE && SI == SE)
8898             continue;
8899           const auto It = (SI == SE) ? CI : SI;
8900           // If one component is a pointer and another one is a kind of
8901           // dereference of this pointer (array subscript, section, dereference,
8902           // etc.), it is not an overlapping.
8903           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8904               std::prev(It)
8905                   ->getAssociatedExpression()
8906                   ->getType()
8907                   .getNonReferenceType()
8908                   ->isPointerType())
8909             continue;
8910           const MapData &BaseData = CI == CE ? L : L1;
8911           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8912               SI == SE ? Components : Components1;
8913           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8914           OverlappedElements.getSecond().push_back(SubData);
8915         }
8916       }
8917     }
8918     // Sort the overlapped elements for each item.
8919     llvm::SmallVector<const FieldDecl *, 4> Layout;
8920     if (!OverlappedData.empty()) {
8921       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8922       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8923       while (BaseType != OrigType) {
8924         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8925         OrigType = BaseType->getPointeeOrArrayElementType();
8926       }
8927 
8928       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8929         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8930       else {
8931         const auto *RD = BaseType->getAsRecordDecl();
8932         Layout.append(RD->field_begin(), RD->field_end());
8933       }
8934     }
8935     for (auto &Pair : OverlappedData) {
8936       llvm::stable_sort(
8937           Pair.getSecond(),
8938           [&Layout](
8939               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8940               OMPClauseMappableExprCommon::MappableExprComponentListRef
8941                   Second) {
8942             auto CI = First.rbegin();
8943             auto CE = First.rend();
8944             auto SI = Second.rbegin();
8945             auto SE = Second.rend();
8946             for (; CI != CE && SI != SE; ++CI, ++SI) {
8947               if (CI->getAssociatedExpression()->getStmtClass() !=
8948                   SI->getAssociatedExpression()->getStmtClass())
8949                 break;
8950               // Are we dealing with different variables/fields?
8951               if (CI->getAssociatedDeclaration() !=
8952                   SI->getAssociatedDeclaration())
8953                 break;
8954             }
8955 
8956             // Lists contain the same elements.
8957             if (CI == CE && SI == SE)
8958               return false;
8959 
8960             // List with less elements is less than list with more elements.
8961             if (CI == CE || SI == SE)
8962               return CI == CE;
8963 
8964             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8965             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8966             if (FD1->getParent() == FD2->getParent())
8967               return FD1->getFieldIndex() < FD2->getFieldIndex();
8968             const auto It =
8969                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8970                   return FD == FD1 || FD == FD2;
8971                 });
8972             return *It == FD1;
8973           });
8974     }
8975 
8976     // Associated with a capture, because the mapping flags depend on it.
8977     // Go through all of the elements with the overlapped elements.
8978     bool IsFirstComponentList = true;
8979     for (const auto &Pair : OverlappedData) {
8980       const MapData &L = *Pair.getFirst();
8981       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8982       OpenMPMapClauseKind MapType;
8983       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8984       bool IsImplicit;
8985       const ValueDecl *Mapper;
8986       const Expr *VarRef;
8987       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8988           L;
8989       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8990           OverlappedComponents = Pair.getSecond();
8991       generateInfoForComponentList(
8992           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8993           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8994           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8995       IsFirstComponentList = false;
8996     }
8997     // Go through other elements without overlapped elements.
8998     for (const MapData &L : DeclComponentLists) {
8999       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9000       OpenMPMapClauseKind MapType;
9001       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9002       bool IsImplicit;
9003       const ValueDecl *Mapper;
9004       const Expr *VarRef;
9005       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9006           L;
9007       auto It = OverlappedData.find(&L);
9008       if (It == OverlappedData.end())
9009         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9010                                      Components, CombinedInfo, PartialStruct,
9011                                      IsFirstComponentList, IsImplicit, Mapper,
9012                                      /*ForDeviceAddr=*/false, VD, VarRef);
9013       IsFirstComponentList = false;
9014     }
9015   }
9016 
9017   /// Generate the default map information for a given capture \a CI,
9018   /// record field declaration \a RI and captured value \a CV.
9019   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9020                               const FieldDecl &RI, llvm::Value *CV,
9021                               MapCombinedInfoTy &CombinedInfo) const {
9022     bool IsImplicit = true;
9023     // Do the default mapping.
9024     if (CI.capturesThis()) {
9025       CombinedInfo.Exprs.push_back(nullptr);
9026       CombinedInfo.BasePointers.push_back(CV);
9027       CombinedInfo.Pointers.push_back(CV);
9028       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9029       CombinedInfo.Sizes.push_back(
9030           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9031                                     CGF.Int64Ty, /*isSigned=*/true));
9032       // Default map type.
9033       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9034     } else if (CI.capturesVariableByCopy()) {
9035       const VarDecl *VD = CI.getCapturedVar();
9036       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9037       CombinedInfo.BasePointers.push_back(CV);
9038       CombinedInfo.Pointers.push_back(CV);
9039       if (!RI.getType()->isAnyPointerType()) {
9040         // We have to signal to the runtime captures passed by value that are
9041         // not pointers.
9042         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9043         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9044             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9045       } else {
9046         // Pointers are implicitly mapped with a zero size and no flags
9047         // (other than first map that is added for all implicit maps).
9048         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9049         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9050       }
9051       auto I = FirstPrivateDecls.find(VD);
9052       if (I != FirstPrivateDecls.end())
9053         IsImplicit = I->getSecond();
9054     } else {
9055       assert(CI.capturesVariable() && "Expected captured reference.");
9056       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9057       QualType ElementType = PtrTy->getPointeeType();
9058       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9059           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9060       // The default map type for a scalar/complex type is 'to' because by
9061       // default the value doesn't have to be retrieved. For an aggregate
9062       // type, the default is 'tofrom'.
9063       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9064       const VarDecl *VD = CI.getCapturedVar();
9065       auto I = FirstPrivateDecls.find(VD);
9066       if (I != FirstPrivateDecls.end() &&
9067           VD->getType().isConstant(CGF.getContext())) {
9068         llvm::Constant *Addr =
9069             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9070         // Copy the value of the original variable to the new global copy.
9071         CGF.Builder.CreateMemCpy(
9072             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9073             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9074             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9075         // Use new global variable as the base pointers.
9076         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9077         CombinedInfo.BasePointers.push_back(Addr);
9078         CombinedInfo.Pointers.push_back(Addr);
9079       } else {
9080         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9081         CombinedInfo.BasePointers.push_back(CV);
9082         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9083           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9084               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9085               AlignmentSource::Decl));
9086           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9087         } else {
9088           CombinedInfo.Pointers.push_back(CV);
9089         }
9090       }
9091       if (I != FirstPrivateDecls.end())
9092         IsImplicit = I->getSecond();
9093     }
9094     // Every default map produces a single argument which is a target parameter.
9095     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9096 
9097     // Add flag stating this is an implicit map.
9098     if (IsImplicit)
9099       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9100 
9101     // No user-defined mapper for default mapping.
9102     CombinedInfo.Mappers.push_back(nullptr);
9103   }
9104 };
9105 } // anonymous namespace
9106 
9107 static void emitNonContiguousDescriptor(
9108     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9109     CGOpenMPRuntime::TargetDataInfo &Info) {
9110   CodeGenModule &CGM = CGF.CGM;
9111   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9112       &NonContigInfo = CombinedInfo.NonContigInfo;
9113 
9114   // Build an array of struct descriptor_dim and then assign it to
9115   // offload_args.
9116   //
9117   // struct descriptor_dim {
9118   //  uint64_t offset;
9119   //  uint64_t count;
9120   //  uint64_t stride
9121   // };
9122   ASTContext &C = CGF.getContext();
9123   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9124   RecordDecl *RD;
9125   RD = C.buildImplicitRecord("descriptor_dim");
9126   RD->startDefinition();
9127   addFieldToRecordDecl(C, RD, Int64Ty);
9128   addFieldToRecordDecl(C, RD, Int64Ty);
9129   addFieldToRecordDecl(C, RD, Int64Ty);
9130   RD->completeDefinition();
9131   QualType DimTy = C.getRecordType(RD);
9132 
9133   enum { OffsetFD = 0, CountFD, StrideFD };
9134   // We need two index variable here since the size of "Dims" is the same as the
9135   // size of Components, however, the size of offset, count, and stride is equal
9136   // to the size of base declaration that is non-contiguous.
9137   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9138     // Skip emitting ir if dimension size is 1 since it cannot be
9139     // non-contiguous.
9140     if (NonContigInfo.Dims[I] == 1)
9141       continue;
9142     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9143     QualType ArrayTy =
9144         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9145     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9146     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9147       unsigned RevIdx = EE - II - 1;
9148       LValue DimsLVal = CGF.MakeAddrLValue(
9149           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9150       // Offset
9151       LValue OffsetLVal = CGF.EmitLValueForField(
9152           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9153       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9154       // Count
9155       LValue CountLVal = CGF.EmitLValueForField(
9156           DimsLVal, *std::next(RD->field_begin(), CountFD));
9157       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9158       // Stride
9159       LValue StrideLVal = CGF.EmitLValueForField(
9160           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9161       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9162     }
9163     // args[I] = &dims
9164     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9165         DimsAddr, CGM.Int8PtrTy);
9166     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9167         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9168         Info.PointersArray, 0, I);
9169     Address PAddr(P, CGF.getPointerAlign());
9170     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9171     ++L;
9172   }
9173 }
9174 
9175 /// Emit a string constant containing the names of the values mapped to the
9176 /// offloading runtime library.
9177 llvm::Constant *
9178 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9179                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9180   llvm::Constant *SrcLocStr;
9181   if (!MapExprs.getMapDecl()) {
9182     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9183   } else {
9184     std::string ExprName = "";
9185     if (MapExprs.getMapExpr()) {
9186       PrintingPolicy P(CGF.getContext().getLangOpts());
9187       llvm::raw_string_ostream OS(ExprName);
9188       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9189       OS.flush();
9190     } else {
9191       ExprName = MapExprs.getMapDecl()->getNameAsString();
9192     }
9193 
9194     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9195     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9196     const char *FileName = PLoc.getFilename();
9197     unsigned Line = PLoc.getLine();
9198     unsigned Column = PLoc.getColumn();
9199     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9200                                                 Line, Column);
9201   }
9202 
9203   return SrcLocStr;
9204 }
9205 
9206 /// Emit the arrays used to pass the captures and map information to the
9207 /// offloading runtime library. If there is no map or capture information,
9208 /// return nullptr by reference.
9209 static void emitOffloadingArrays(
9210     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9211     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9212     bool IsNonContiguous = false) {
9213   CodeGenModule &CGM = CGF.CGM;
9214   ASTContext &Ctx = CGF.getContext();
9215 
9216   // Reset the array information.
9217   Info.clearArrayInfo();
9218   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9219 
9220   if (Info.NumberOfPtrs) {
9221     // Detect if we have any capture size requiring runtime evaluation of the
9222     // size so that a constant array could be eventually used.
9223     bool hasRuntimeEvaluationCaptureSize = false;
9224     for (llvm::Value *S : CombinedInfo.Sizes)
9225       if (!isa<llvm::Constant>(S)) {
9226         hasRuntimeEvaluationCaptureSize = true;
9227         break;
9228       }
9229 
9230     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9231     QualType PointerArrayType = Ctx.getConstantArrayType(
9232         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9233         /*IndexTypeQuals=*/0);
9234 
9235     Info.BasePointersArray =
9236         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9237     Info.PointersArray =
9238         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9239     Address MappersArray =
9240         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9241     Info.MappersArray = MappersArray.getPointer();
9242 
9243     // If we don't have any VLA types or other types that require runtime
9244     // evaluation, we can use a constant array for the map sizes, otherwise we
9245     // need to fill up the arrays as we do for the pointers.
9246     QualType Int64Ty =
9247         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9248     if (hasRuntimeEvaluationCaptureSize) {
9249       QualType SizeArrayType = Ctx.getConstantArrayType(
9250           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9251           /*IndexTypeQuals=*/0);
9252       Info.SizesArray =
9253           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9254     } else {
9255       // We expect all the sizes to be constant, so we collect them to create
9256       // a constant array.
9257       SmallVector<llvm::Constant *, 16> ConstSizes;
9258       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9259         if (IsNonContiguous &&
9260             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9261           ConstSizes.push_back(llvm::ConstantInt::get(
9262               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9263         } else {
9264           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9265         }
9266       }
9267 
9268       auto *SizesArrayInit = llvm::ConstantArray::get(
9269           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9270       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9271       auto *SizesArrayGbl = new llvm::GlobalVariable(
9272           CGM.getModule(), SizesArrayInit->getType(),
9273           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9274           SizesArrayInit, Name);
9275       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9276       Info.SizesArray = SizesArrayGbl;
9277     }
9278 
9279     // The map types are always constant so we don't need to generate code to
9280     // fill arrays. Instead, we create an array constant.
9281     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9282     llvm::copy(CombinedInfo.Types, Mapping.begin());
9283     llvm::Constant *MapTypesArrayInit =
9284         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9285     std::string MaptypesName =
9286         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9287     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9288         CGM.getModule(), MapTypesArrayInit->getType(),
9289         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9290         MapTypesArrayInit, MaptypesName);
9291     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9292     Info.MapTypesArray = MapTypesArrayGbl;
9293 
9294     // The information types are only built if there is debug information
9295     // requested.
9296     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9297       Info.MapNamesArray = llvm::Constant::getNullValue(
9298           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9299     } else {
9300       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9301         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9302       };
9303       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9304       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9305 
9306       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9307           llvm::ArrayType::get(
9308               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9309               CombinedInfo.Exprs.size()),
9310           InfoMap);
9311       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9312           CGM.getModule(), MapNamesArrayInit->getType(),
9313           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9314           MapNamesArrayInit,
9315           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9316       Info.MapNamesArray = MapNamesArrayGbl;
9317     }
9318 
9319     // If there's a present map type modifier, it must not be applied to the end
9320     // of a region, so generate a separate map type array in that case.
9321     if (Info.separateBeginEndCalls()) {
9322       bool EndMapTypesDiffer = false;
9323       for (uint64_t &Type : Mapping) {
9324         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9325           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9326           EndMapTypesDiffer = true;
9327         }
9328       }
9329       if (EndMapTypesDiffer) {
9330         MapTypesArrayInit =
9331             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9332         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9333         MapTypesArrayGbl = new llvm::GlobalVariable(
9334             CGM.getModule(), MapTypesArrayInit->getType(),
9335             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9336             MapTypesArrayInit, MaptypesName);
9337         MapTypesArrayGbl->setUnnamedAddr(
9338             llvm::GlobalValue::UnnamedAddr::Global);
9339         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9340       }
9341     }
9342 
9343     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9344       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9345       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9346           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9347           Info.BasePointersArray, 0, I);
9348       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9349           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9350       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9351       CGF.Builder.CreateStore(BPVal, BPAddr);
9352 
9353       if (Info.requiresDevicePointerInfo())
9354         if (const ValueDecl *DevVD =
9355                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9356           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9357 
9358       llvm::Value *PVal = CombinedInfo.Pointers[I];
9359       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9360           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9361           Info.PointersArray, 0, I);
9362       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9363           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9364       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9365       CGF.Builder.CreateStore(PVal, PAddr);
9366 
9367       if (hasRuntimeEvaluationCaptureSize) {
9368         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9369             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9370             Info.SizesArray,
9371             /*Idx0=*/0,
9372             /*Idx1=*/I);
9373         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9374         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9375                                                           CGM.Int64Ty,
9376                                                           /*isSigned=*/true),
9377                                 SAddr);
9378       }
9379 
9380       // Fill up the mapper array.
9381       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9382       if (CombinedInfo.Mappers[I]) {
9383         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9384             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9385         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9386         Info.HasMapper = true;
9387       }
9388       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9389       CGF.Builder.CreateStore(MFunc, MAddr);
9390     }
9391   }
9392 
9393   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9394       Info.NumberOfPtrs == 0)
9395     return;
9396 
9397   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9398 }
9399 
9400 namespace {
9401 /// Additional arguments for emitOffloadingArraysArgument function.
9402 struct ArgumentsOptions {
9403   bool ForEndCall = false;
9404   ArgumentsOptions() = default;
9405   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9406 };
9407 } // namespace
9408 
9409 /// Emit the arguments to be passed to the runtime library based on the
9410 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9411 /// ForEndCall, emit map types to be passed for the end of the region instead of
9412 /// the beginning.
9413 static void emitOffloadingArraysArgument(
9414     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9415     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9416     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9417     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9418     const ArgumentsOptions &Options = ArgumentsOptions()) {
9419   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9420          "expected region end call to runtime only when end call is separate");
9421   CodeGenModule &CGM = CGF.CGM;
9422   if (Info.NumberOfPtrs) {
9423     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9424         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9425         Info.BasePointersArray,
9426         /*Idx0=*/0, /*Idx1=*/0);
9427     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9428         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9429         Info.PointersArray,
9430         /*Idx0=*/0,
9431         /*Idx1=*/0);
9432     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9433         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9434         /*Idx0=*/0, /*Idx1=*/0);
9435     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9436         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9437         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9438                                                     : Info.MapTypesArray,
9439         /*Idx0=*/0,
9440         /*Idx1=*/0);
9441 
9442     // Only emit the mapper information arrays if debug information is
9443     // requested.
9444     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9445       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9446     else
9447       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9448           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9449           Info.MapNamesArray,
9450           /*Idx0=*/0,
9451           /*Idx1=*/0);
9452     // If there is no user-defined mapper, set the mapper array to nullptr to
9453     // avoid an unnecessary data privatization
9454     if (!Info.HasMapper)
9455       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9456     else
9457       MappersArrayArg =
9458           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9459   } else {
9460     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9461     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9462     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9463     MapTypesArrayArg =
9464         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9465     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9466     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9467   }
9468 }
9469 
9470 /// Check for inner distribute directive.
9471 static const OMPExecutableDirective *
9472 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9473   const auto *CS = D.getInnermostCapturedStmt();
9474   const auto *Body =
9475       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9476   const Stmt *ChildStmt =
9477       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9478 
9479   if (const auto *NestedDir =
9480           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9481     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9482     switch (D.getDirectiveKind()) {
9483     case OMPD_target:
9484       if (isOpenMPDistributeDirective(DKind))
9485         return NestedDir;
9486       if (DKind == OMPD_teams) {
9487         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9488             /*IgnoreCaptured=*/true);
9489         if (!Body)
9490           return nullptr;
9491         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9492         if (const auto *NND =
9493                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9494           DKind = NND->getDirectiveKind();
9495           if (isOpenMPDistributeDirective(DKind))
9496             return NND;
9497         }
9498       }
9499       return nullptr;
9500     case OMPD_target_teams:
9501       if (isOpenMPDistributeDirective(DKind))
9502         return NestedDir;
9503       return nullptr;
9504     case OMPD_target_parallel:
9505     case OMPD_target_simd:
9506     case OMPD_target_parallel_for:
9507     case OMPD_target_parallel_for_simd:
9508       return nullptr;
9509     case OMPD_target_teams_distribute:
9510     case OMPD_target_teams_distribute_simd:
9511     case OMPD_target_teams_distribute_parallel_for:
9512     case OMPD_target_teams_distribute_parallel_for_simd:
9513     case OMPD_parallel:
9514     case OMPD_for:
9515     case OMPD_parallel_for:
9516     case OMPD_parallel_master:
9517     case OMPD_parallel_sections:
9518     case OMPD_for_simd:
9519     case OMPD_parallel_for_simd:
9520     case OMPD_cancel:
9521     case OMPD_cancellation_point:
9522     case OMPD_ordered:
9523     case OMPD_threadprivate:
9524     case OMPD_allocate:
9525     case OMPD_task:
9526     case OMPD_simd:
9527     case OMPD_tile:
9528     case OMPD_sections:
9529     case OMPD_section:
9530     case OMPD_single:
9531     case OMPD_master:
9532     case OMPD_critical:
9533     case OMPD_taskyield:
9534     case OMPD_barrier:
9535     case OMPD_taskwait:
9536     case OMPD_taskgroup:
9537     case OMPD_atomic:
9538     case OMPD_flush:
9539     case OMPD_depobj:
9540     case OMPD_scan:
9541     case OMPD_teams:
9542     case OMPD_target_data:
9543     case OMPD_target_exit_data:
9544     case OMPD_target_enter_data:
9545     case OMPD_distribute:
9546     case OMPD_distribute_simd:
9547     case OMPD_distribute_parallel_for:
9548     case OMPD_distribute_parallel_for_simd:
9549     case OMPD_teams_distribute:
9550     case OMPD_teams_distribute_simd:
9551     case OMPD_teams_distribute_parallel_for:
9552     case OMPD_teams_distribute_parallel_for_simd:
9553     case OMPD_target_update:
9554     case OMPD_declare_simd:
9555     case OMPD_declare_variant:
9556     case OMPD_begin_declare_variant:
9557     case OMPD_end_declare_variant:
9558     case OMPD_declare_target:
9559     case OMPD_end_declare_target:
9560     case OMPD_declare_reduction:
9561     case OMPD_declare_mapper:
9562     case OMPD_taskloop:
9563     case OMPD_taskloop_simd:
9564     case OMPD_master_taskloop:
9565     case OMPD_master_taskloop_simd:
9566     case OMPD_parallel_master_taskloop:
9567     case OMPD_parallel_master_taskloop_simd:
9568     case OMPD_requires:
9569     case OMPD_unknown:
9570     default:
9571       llvm_unreachable("Unexpected directive.");
9572     }
9573   }
9574 
9575   return nullptr;
9576 }
9577 
9578 /// Emit the user-defined mapper function. The code generation follows the
9579 /// pattern in the example below.
9580 /// \code
9581 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9582 ///                                           void *base, void *begin,
9583 ///                                           int64_t size, int64_t type,
9584 ///                                           void *name = nullptr) {
9585 ///   // Allocate space for an array section first or add a base/begin for
9586 ///   // pointer dereference.
9587 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9588 ///       !maptype.IsDelete)
9589 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9590 ///                                 size*sizeof(Ty), clearToFromMember(type));
9591 ///   // Map members.
9592 ///   for (unsigned i = 0; i < size; i++) {
9593 ///     // For each component specified by this mapper:
9594 ///     for (auto c : begin[i]->all_components) {
9595 ///       if (c.hasMapper())
9596 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9597 ///                       c.arg_type, c.arg_name);
9598 ///       else
9599 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9600 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9601 ///                                     c.arg_name);
9602 ///     }
9603 ///   }
9604 ///   // Delete the array section.
9605 ///   if (size > 1 && maptype.IsDelete)
9606 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9607 ///                                 size*sizeof(Ty), clearToFromMember(type));
9608 /// }
9609 /// \endcode
9610 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9611                                             CodeGenFunction *CGF) {
9612   if (UDMMap.count(D) > 0)
9613     return;
9614   ASTContext &C = CGM.getContext();
9615   QualType Ty = D->getType();
9616   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9617   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9618   auto *MapperVarDecl =
9619       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9620   SourceLocation Loc = D->getLocation();
9621   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9622 
9623   // Prepare mapper function arguments and attributes.
9624   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9625                               C.VoidPtrTy, ImplicitParamDecl::Other);
9626   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9627                             ImplicitParamDecl::Other);
9628   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9629                              C.VoidPtrTy, ImplicitParamDecl::Other);
9630   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9631                             ImplicitParamDecl::Other);
9632   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9633                             ImplicitParamDecl::Other);
9634   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9635                             ImplicitParamDecl::Other);
9636   FunctionArgList Args;
9637   Args.push_back(&HandleArg);
9638   Args.push_back(&BaseArg);
9639   Args.push_back(&BeginArg);
9640   Args.push_back(&SizeArg);
9641   Args.push_back(&TypeArg);
9642   Args.push_back(&NameArg);
9643   const CGFunctionInfo &FnInfo =
9644       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9645   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9646   SmallString<64> TyStr;
9647   llvm::raw_svector_ostream Out(TyStr);
9648   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9649   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9650   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9651                                     Name, &CGM.getModule());
9652   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9653   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9654   // Start the mapper function code generation.
9655   CodeGenFunction MapperCGF(CGM);
9656   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9657   // Compute the starting and end addresses of array elements.
9658   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9659       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9660       C.getPointerType(Int64Ty), Loc);
9661   // Prepare common arguments for array initiation and deletion.
9662   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9663       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9664       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9665   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9666       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9667       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9668   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9669       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9670       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9671   // Convert the size in bytes into the number of array elements.
9672   Size = MapperCGF.Builder.CreateExactUDiv(
9673       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9674   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9675       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9676   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9677   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9678       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9679       C.getPointerType(Int64Ty), Loc);
9680 
9681   // Emit array initiation if this is an array section and \p MapType indicates
9682   // that memory allocation is required.
9683   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9684   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9685                              ElementSize, HeadBB, /*IsInit=*/true);
9686 
9687   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9688 
9689   // Emit the loop header block.
9690   MapperCGF.EmitBlock(HeadBB);
9691   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9692   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9693   // Evaluate whether the initial condition is satisfied.
9694   llvm::Value *IsEmpty =
9695       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9696   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9697   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9698 
9699   // Emit the loop body block.
9700   MapperCGF.EmitBlock(BodyBB);
9701   llvm::BasicBlock *LastBB = BodyBB;
9702   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9703       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9704   PtrPHI->addIncoming(PtrBegin, EntryBB);
9705   Address PtrCurrent =
9706       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9707                           .getAlignment()
9708                           .alignmentOfArrayElement(ElementSize));
9709   // Privatize the declared variable of mapper to be the current array element.
9710   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9711   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9712   (void)Scope.Privatize();
9713 
9714   // Get map clause information. Fill up the arrays with all mapped variables.
9715   MappableExprsHandler::MapCombinedInfoTy Info;
9716   MappableExprsHandler MEHandler(*D, MapperCGF);
9717   MEHandler.generateAllInfoForMapper(Info);
9718 
9719   // Call the runtime API __tgt_mapper_num_components to get the number of
9720   // pre-existing components.
9721   llvm::Value *OffloadingArgs[] = {Handle};
9722   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9723       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9724                                             OMPRTL___tgt_mapper_num_components),
9725       OffloadingArgs);
9726   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9727       PreviousSize,
9728       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9729 
9730   // Fill up the runtime mapper handle for all components.
9731   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9732     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9733         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9734     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9735         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9736     llvm::Value *CurSizeArg = Info.Sizes[I];
9737     llvm::Value *CurNameArg =
9738         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9739             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9740             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9741 
9742     // Extract the MEMBER_OF field from the map type.
9743     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9744     llvm::Value *MemberMapType =
9745         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9746 
9747     // Combine the map type inherited from user-defined mapper with that
9748     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9749     // bits of the \a MapType, which is the input argument of the mapper
9750     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9751     // bits of MemberMapType.
9752     // [OpenMP 5.0], 1.2.6. map-type decay.
9753     //        | alloc |  to   | from  | tofrom | release | delete
9754     // ----------------------------------------------------------
9755     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9756     // to     | alloc |  to   | alloc |   to   | release | delete
9757     // from   | alloc | alloc | from  |  from  | release | delete
9758     // tofrom | alloc |  to   | from  | tofrom | release | delete
9759     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9760         MapType,
9761         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9762                                    MappableExprsHandler::OMP_MAP_FROM));
9763     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9764     llvm::BasicBlock *AllocElseBB =
9765         MapperCGF.createBasicBlock("omp.type.alloc.else");
9766     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9767     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9768     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9769     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9770     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9771     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9772     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9773     MapperCGF.EmitBlock(AllocBB);
9774     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9775         MemberMapType,
9776         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9777                                      MappableExprsHandler::OMP_MAP_FROM)));
9778     MapperCGF.Builder.CreateBr(EndBB);
9779     MapperCGF.EmitBlock(AllocElseBB);
9780     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9781         LeftToFrom,
9782         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9783     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9784     // In case of to, clear OMP_MAP_FROM.
9785     MapperCGF.EmitBlock(ToBB);
9786     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9787         MemberMapType,
9788         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9789     MapperCGF.Builder.CreateBr(EndBB);
9790     MapperCGF.EmitBlock(ToElseBB);
9791     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9792         LeftToFrom,
9793         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9794     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9795     // In case of from, clear OMP_MAP_TO.
9796     MapperCGF.EmitBlock(FromBB);
9797     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9798         MemberMapType,
9799         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9800     // In case of tofrom, do nothing.
9801     MapperCGF.EmitBlock(EndBB);
9802     LastBB = EndBB;
9803     llvm::PHINode *CurMapType =
9804         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9805     CurMapType->addIncoming(AllocMapType, AllocBB);
9806     CurMapType->addIncoming(ToMapType, ToBB);
9807     CurMapType->addIncoming(FromMapType, FromBB);
9808     CurMapType->addIncoming(MemberMapType, ToElseBB);
9809 
9810     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9811                                      CurSizeArg, CurMapType, CurNameArg};
9812     if (Info.Mappers[I]) {
9813       // Call the corresponding mapper function.
9814       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9815           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9816       assert(MapperFunc && "Expect a valid mapper function is available.");
9817       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9818     } else {
9819       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9820       // data structure.
9821       MapperCGF.EmitRuntimeCall(
9822           OMPBuilder.getOrCreateRuntimeFunction(
9823               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9824           OffloadingArgs);
9825     }
9826   }
9827 
9828   // Update the pointer to point to the next element that needs to be mapped,
9829   // and check whether we have mapped all elements.
9830   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9831       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9832   PtrPHI->addIncoming(PtrNext, LastBB);
9833   llvm::Value *IsDone =
9834       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9835   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9836   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9837 
9838   MapperCGF.EmitBlock(ExitBB);
9839   // Emit array deletion if this is an array section and \p MapType indicates
9840   // that deletion is required.
9841   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9842                              ElementSize, DoneBB, /*IsInit=*/false);
9843 
9844   // Emit the function exit block.
9845   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9846   MapperCGF.FinishFunction();
9847   UDMMap.try_emplace(D, Fn);
9848   if (CGF) {
9849     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9850     Decls.second.push_back(D);
9851   }
9852 }
9853 
9854 /// Emit the array initialization or deletion portion for user-defined mapper
9855 /// code generation. First, it evaluates whether an array section is mapped and
9856 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9857 /// true, and \a MapType indicates to not delete this array, array
9858 /// initialization code is generated. If \a IsInit is false, and \a MapType
9859 /// indicates to not this array, array deletion code is generated.
9860 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9861     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9862     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9863     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9864   StringRef Prefix = IsInit ? ".init" : ".del";
9865 
9866   // Evaluate if this is an array section.
9867   llvm::BasicBlock *BodyBB =
9868       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9869   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9870       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9871   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9872       MapType,
9873       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9874   llvm::Value *DeleteCond;
9875   llvm::Value *Cond;
9876   if (IsInit) {
9877     // base != begin?
9878     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9879         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9880     // IsPtrAndObj?
9881     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9882         MapType,
9883         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9884     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9885     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9886     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9887     DeleteCond = MapperCGF.Builder.CreateIsNull(
9888         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9889   } else {
9890     Cond = IsArray;
9891     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9892         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9893   }
9894   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9895   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9896 
9897   MapperCGF.EmitBlock(BodyBB);
9898   // Get the array size by multiplying element size and element number (i.e., \p
9899   // Size).
9900   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9901       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9902   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9903   // memory allocation/deletion purpose only.
9904   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9905       MapType,
9906       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9907                                    MappableExprsHandler::OMP_MAP_FROM |
9908                                    MappableExprsHandler::OMP_MAP_MEMBER_OF)));
9909   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9910 
9911   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9912   // data structure.
9913   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9914                                    ArraySize, MapTypeArg, MapNameArg};
9915   MapperCGF.EmitRuntimeCall(
9916       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9917                                             OMPRTL___tgt_push_mapper_component),
9918       OffloadingArgs);
9919 }
9920 
9921 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9922     const OMPDeclareMapperDecl *D) {
9923   auto I = UDMMap.find(D);
9924   if (I != UDMMap.end())
9925     return I->second;
9926   emitUserDefinedMapper(D);
9927   return UDMMap.lookup(D);
9928 }
9929 
9930 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9931     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9932     llvm::Value *DeviceID,
9933     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9934                                      const OMPLoopDirective &D)>
9935         SizeEmitter) {
9936   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9937   const OMPExecutableDirective *TD = &D;
9938   // Get nested teams distribute kind directive, if any.
9939   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9940     TD = getNestedDistributeDirective(CGM.getContext(), D);
9941   if (!TD)
9942     return;
9943   const auto *LD = cast<OMPLoopDirective>(TD);
9944   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9945                                                          PrePostActionTy &) {
9946     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9947       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9948       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9949       CGF.EmitRuntimeCall(
9950           OMPBuilder.getOrCreateRuntimeFunction(
9951               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9952           Args);
9953     }
9954   };
9955   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9956 }
9957 
9958 void CGOpenMPRuntime::emitTargetCall(
9959     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9960     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9961     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9962     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9963                                      const OMPLoopDirective &D)>
9964         SizeEmitter) {
9965   if (!CGF.HaveInsertPoint())
9966     return;
9967 
9968   assert(OutlinedFn && "Invalid outlined function!");
9969 
9970   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9971                                  D.hasClausesOfKind<OMPNowaitClause>();
9972   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9973   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9974   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9975                                             PrePostActionTy &) {
9976     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9977   };
9978   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9979 
9980   CodeGenFunction::OMPTargetDataInfo InputInfo;
9981   llvm::Value *MapTypesArray = nullptr;
9982   llvm::Value *MapNamesArray = nullptr;
9983   // Fill up the pointer arrays and transfer execution to the device.
9984   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9985                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9986                     &CapturedVars,
9987                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9988     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9989       // Reverse offloading is not supported, so just execute on the host.
9990       if (RequiresOuterTask) {
9991         CapturedVars.clear();
9992         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9993       }
9994       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9995       return;
9996     }
9997 
9998     // On top of the arrays that were filled up, the target offloading call
9999     // takes as arguments the device id as well as the host pointer. The host
10000     // pointer is used by the runtime library to identify the current target
10001     // region, so it only has to be unique and not necessarily point to
10002     // anything. It could be the pointer to the outlined function that
10003     // implements the target region, but we aren't using that so that the
10004     // compiler doesn't need to keep that, and could therefore inline the host
10005     // function if proven worthwhile during optimization.
10006 
10007     // From this point on, we need to have an ID of the target region defined.
10008     assert(OutlinedFnID && "Invalid outlined function ID!");
10009 
10010     // Emit device ID if any.
10011     llvm::Value *DeviceID;
10012     if (Device.getPointer()) {
10013       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10014               Device.getInt() == OMPC_DEVICE_device_num) &&
10015              "Expected device_num modifier.");
10016       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10017       DeviceID =
10018           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10019     } else {
10020       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10021     }
10022 
10023     // Emit the number of elements in the offloading arrays.
10024     llvm::Value *PointerNum =
10025         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10026 
10027     // Return value of the runtime offloading call.
10028     llvm::Value *Return;
10029 
10030     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10031     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10032 
10033     // Source location for the ident struct
10034     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10035 
10036     // Emit tripcount for the target loop-based directive.
10037     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10038 
10039     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10040     // The target region is an outlined function launched by the runtime
10041     // via calls __tgt_target() or __tgt_target_teams().
10042     //
10043     // __tgt_target() launches a target region with one team and one thread,
10044     // executing a serial region.  This master thread may in turn launch
10045     // more threads within its team upon encountering a parallel region,
10046     // however, no additional teams can be launched on the device.
10047     //
10048     // __tgt_target_teams() launches a target region with one or more teams,
10049     // each with one or more threads.  This call is required for target
10050     // constructs such as:
10051     //  'target teams'
10052     //  'target' / 'teams'
10053     //  'target teams distribute parallel for'
10054     //  'target parallel'
10055     // and so on.
10056     //
10057     // Note that on the host and CPU targets, the runtime implementation of
10058     // these calls simply call the outlined function without forking threads.
10059     // The outlined functions themselves have runtime calls to
10060     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10061     // the compiler in emitTeamsCall() and emitParallelCall().
10062     //
10063     // In contrast, on the NVPTX target, the implementation of
10064     // __tgt_target_teams() launches a GPU kernel with the requested number
10065     // of teams and threads so no additional calls to the runtime are required.
10066     if (NumTeams) {
10067       // If we have NumTeams defined this means that we have an enclosed teams
10068       // region. Therefore we also expect to have NumThreads defined. These two
10069       // values should be defined in the presence of a teams directive,
10070       // regardless of having any clauses associated. If the user is using teams
10071       // but no clauses, these two values will be the default that should be
10072       // passed to the runtime library - a 32-bit integer with the value zero.
10073       assert(NumThreads && "Thread limit expression should be available along "
10074                            "with number of teams.");
10075       llvm::Value *OffloadingArgs[] = {RTLoc,
10076                                        DeviceID,
10077                                        OutlinedFnID,
10078                                        PointerNum,
10079                                        InputInfo.BasePointersArray.getPointer(),
10080                                        InputInfo.PointersArray.getPointer(),
10081                                        InputInfo.SizesArray.getPointer(),
10082                                        MapTypesArray,
10083                                        MapNamesArray,
10084                                        InputInfo.MappersArray.getPointer(),
10085                                        NumTeams,
10086                                        NumThreads};
10087       Return = CGF.EmitRuntimeCall(
10088           OMPBuilder.getOrCreateRuntimeFunction(
10089               CGM.getModule(), HasNowait
10090                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10091                                    : OMPRTL___tgt_target_teams_mapper),
10092           OffloadingArgs);
10093     } else {
10094       llvm::Value *OffloadingArgs[] = {RTLoc,
10095                                        DeviceID,
10096                                        OutlinedFnID,
10097                                        PointerNum,
10098                                        InputInfo.BasePointersArray.getPointer(),
10099                                        InputInfo.PointersArray.getPointer(),
10100                                        InputInfo.SizesArray.getPointer(),
10101                                        MapTypesArray,
10102                                        MapNamesArray,
10103                                        InputInfo.MappersArray.getPointer()};
10104       Return = CGF.EmitRuntimeCall(
10105           OMPBuilder.getOrCreateRuntimeFunction(
10106               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10107                                          : OMPRTL___tgt_target_mapper),
10108           OffloadingArgs);
10109     }
10110 
10111     // Check the error code and execute the host version if required.
10112     llvm::BasicBlock *OffloadFailedBlock =
10113         CGF.createBasicBlock("omp_offload.failed");
10114     llvm::BasicBlock *OffloadContBlock =
10115         CGF.createBasicBlock("omp_offload.cont");
10116     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10117     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10118 
10119     CGF.EmitBlock(OffloadFailedBlock);
10120     if (RequiresOuterTask) {
10121       CapturedVars.clear();
10122       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10123     }
10124     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10125     CGF.EmitBranch(OffloadContBlock);
10126 
10127     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10128   };
10129 
10130   // Notify that the host version must be executed.
10131   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10132                     RequiresOuterTask](CodeGenFunction &CGF,
10133                                        PrePostActionTy &) {
10134     if (RequiresOuterTask) {
10135       CapturedVars.clear();
10136       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10137     }
10138     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10139   };
10140 
10141   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10142                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10143                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10144     // Fill up the arrays with all the captured variables.
10145     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10146 
10147     // Get mappable expression information.
10148     MappableExprsHandler MEHandler(D, CGF);
10149     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10150     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10151 
10152     auto RI = CS.getCapturedRecordDecl()->field_begin();
10153     auto *CV = CapturedVars.begin();
10154     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10155                                               CE = CS.capture_end();
10156          CI != CE; ++CI, ++RI, ++CV) {
10157       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10158       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10159 
10160       // VLA sizes are passed to the outlined region by copy and do not have map
10161       // information associated.
10162       if (CI->capturesVariableArrayType()) {
10163         CurInfo.Exprs.push_back(nullptr);
10164         CurInfo.BasePointers.push_back(*CV);
10165         CurInfo.Pointers.push_back(*CV);
10166         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10167             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10168         // Copy to the device as an argument. No need to retrieve it.
10169         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10170                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10171                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10172         CurInfo.Mappers.push_back(nullptr);
10173       } else {
10174         // If we have any information in the map clause, we use it, otherwise we
10175         // just do a default mapping.
10176         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10177         if (!CI->capturesThis())
10178           MappedVarSet.insert(CI->getCapturedVar());
10179         else
10180           MappedVarSet.insert(nullptr);
10181         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10182           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10183         // Generate correct mapping for variables captured by reference in
10184         // lambdas.
10185         if (CI->capturesVariable())
10186           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10187                                                   CurInfo, LambdaPointers);
10188       }
10189       // We expect to have at least an element of information for this capture.
10190       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10191              "Non-existing map pointer for capture!");
10192       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10193              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10194              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10195              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10196              "Inconsistent map information sizes!");
10197 
10198       // If there is an entry in PartialStruct it means we have a struct with
10199       // individual members mapped. Emit an extra combined entry.
10200       if (PartialStruct.Base.isValid()) {
10201         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10202         MEHandler.emitCombinedEntry(
10203             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10204             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10205       }
10206 
10207       // We need to append the results of this capture to what we already have.
10208       CombinedInfo.append(CurInfo);
10209     }
10210     // Adjust MEMBER_OF flags for the lambdas captures.
10211     MEHandler.adjustMemberOfForLambdaCaptures(
10212         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10213         CombinedInfo.Types);
10214     // Map any list items in a map clause that were not captures because they
10215     // weren't referenced within the construct.
10216     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10217 
10218     TargetDataInfo Info;
10219     // Fill up the arrays and create the arguments.
10220     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10221     emitOffloadingArraysArgument(
10222         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10223         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10224         {/*ForEndTask=*/false});
10225 
10226     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10227     InputInfo.BasePointersArray =
10228         Address(Info.BasePointersArray, CGM.getPointerAlign());
10229     InputInfo.PointersArray =
10230         Address(Info.PointersArray, CGM.getPointerAlign());
10231     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10232     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10233     MapTypesArray = Info.MapTypesArray;
10234     MapNamesArray = Info.MapNamesArray;
10235     if (RequiresOuterTask)
10236       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10237     else
10238       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10239   };
10240 
10241   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10242                              CodeGenFunction &CGF, PrePostActionTy &) {
10243     if (RequiresOuterTask) {
10244       CodeGenFunction::OMPTargetDataInfo InputInfo;
10245       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10246     } else {
10247       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10248     }
10249   };
10250 
10251   // If we have a target function ID it means that we need to support
10252   // offloading, otherwise, just execute on the host. We need to execute on host
10253   // regardless of the conditional in the if clause if, e.g., the user do not
10254   // specify target triples.
10255   if (OutlinedFnID) {
10256     if (IfCond) {
10257       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10258     } else {
10259       RegionCodeGenTy ThenRCG(TargetThenGen);
10260       ThenRCG(CGF);
10261     }
10262   } else {
10263     RegionCodeGenTy ElseRCG(TargetElseGen);
10264     ElseRCG(CGF);
10265   }
10266 }
10267 
10268 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10269                                                     StringRef ParentName) {
10270   if (!S)
10271     return;
10272 
10273   // Codegen OMP target directives that offload compute to the device.
10274   bool RequiresDeviceCodegen =
10275       isa<OMPExecutableDirective>(S) &&
10276       isOpenMPTargetExecutionDirective(
10277           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10278 
10279   if (RequiresDeviceCodegen) {
10280     const auto &E = *cast<OMPExecutableDirective>(S);
10281     unsigned DeviceID;
10282     unsigned FileID;
10283     unsigned Line;
10284     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10285                              FileID, Line);
10286 
10287     // Is this a target region that should not be emitted as an entry point? If
10288     // so just signal we are done with this target region.
10289     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10290                                                             ParentName, Line))
10291       return;
10292 
10293     switch (E.getDirectiveKind()) {
10294     case OMPD_target:
10295       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10296                                                    cast<OMPTargetDirective>(E));
10297       break;
10298     case OMPD_target_parallel:
10299       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10300           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10301       break;
10302     case OMPD_target_teams:
10303       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10304           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10305       break;
10306     case OMPD_target_teams_distribute:
10307       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10308           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10309       break;
10310     case OMPD_target_teams_distribute_simd:
10311       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10312           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10313       break;
10314     case OMPD_target_parallel_for:
10315       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10316           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10317       break;
10318     case OMPD_target_parallel_for_simd:
10319       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10320           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10321       break;
10322     case OMPD_target_simd:
10323       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10324           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10325       break;
10326     case OMPD_target_teams_distribute_parallel_for:
10327       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10328           CGM, ParentName,
10329           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10330       break;
10331     case OMPD_target_teams_distribute_parallel_for_simd:
10332       CodeGenFunction::
10333           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10334               CGM, ParentName,
10335               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10336       break;
10337     case OMPD_parallel:
10338     case OMPD_for:
10339     case OMPD_parallel_for:
10340     case OMPD_parallel_master:
10341     case OMPD_parallel_sections:
10342     case OMPD_for_simd:
10343     case OMPD_parallel_for_simd:
10344     case OMPD_cancel:
10345     case OMPD_cancellation_point:
10346     case OMPD_ordered:
10347     case OMPD_threadprivate:
10348     case OMPD_allocate:
10349     case OMPD_task:
10350     case OMPD_simd:
10351     case OMPD_tile:
10352     case OMPD_sections:
10353     case OMPD_section:
10354     case OMPD_single:
10355     case OMPD_master:
10356     case OMPD_critical:
10357     case OMPD_taskyield:
10358     case OMPD_barrier:
10359     case OMPD_taskwait:
10360     case OMPD_taskgroup:
10361     case OMPD_atomic:
10362     case OMPD_flush:
10363     case OMPD_depobj:
10364     case OMPD_scan:
10365     case OMPD_teams:
10366     case OMPD_target_data:
10367     case OMPD_target_exit_data:
10368     case OMPD_target_enter_data:
10369     case OMPD_distribute:
10370     case OMPD_distribute_simd:
10371     case OMPD_distribute_parallel_for:
10372     case OMPD_distribute_parallel_for_simd:
10373     case OMPD_teams_distribute:
10374     case OMPD_teams_distribute_simd:
10375     case OMPD_teams_distribute_parallel_for:
10376     case OMPD_teams_distribute_parallel_for_simd:
10377     case OMPD_target_update:
10378     case OMPD_declare_simd:
10379     case OMPD_declare_variant:
10380     case OMPD_begin_declare_variant:
10381     case OMPD_end_declare_variant:
10382     case OMPD_declare_target:
10383     case OMPD_end_declare_target:
10384     case OMPD_declare_reduction:
10385     case OMPD_declare_mapper:
10386     case OMPD_taskloop:
10387     case OMPD_taskloop_simd:
10388     case OMPD_master_taskloop:
10389     case OMPD_master_taskloop_simd:
10390     case OMPD_parallel_master_taskloop:
10391     case OMPD_parallel_master_taskloop_simd:
10392     case OMPD_requires:
10393     case OMPD_unknown:
10394     default:
10395       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10396     }
10397     return;
10398   }
10399 
10400   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10401     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10402       return;
10403 
10404     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10405     return;
10406   }
10407 
10408   // If this is a lambda function, look into its body.
10409   if (const auto *L = dyn_cast<LambdaExpr>(S))
10410     S = L->getBody();
10411 
10412   // Keep looking for target regions recursively.
10413   for (const Stmt *II : S->children())
10414     scanForTargetRegionsFunctions(II, ParentName);
10415 }
10416 
10417 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10418   // If emitting code for the host, we do not process FD here. Instead we do
10419   // the normal code generation.
10420   if (!CGM.getLangOpts().OpenMPIsDevice) {
10421     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10422       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10423           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10424       // Do not emit device_type(nohost) functions for the host.
10425       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10426         return true;
10427     }
10428     return false;
10429   }
10430 
10431   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10432   // Try to detect target regions in the function.
10433   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10434     StringRef Name = CGM.getMangledName(GD);
10435     scanForTargetRegionsFunctions(FD->getBody(), Name);
10436     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10437         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10438     // Do not emit device_type(nohost) functions for the host.
10439     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10440       return true;
10441   }
10442 
10443   // Do not to emit function if it is not marked as declare target.
10444   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10445          AlreadyEmittedTargetDecls.count(VD) == 0;
10446 }
10447 
10448 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10449   if (!CGM.getLangOpts().OpenMPIsDevice)
10450     return false;
10451 
10452   // Check if there are Ctors/Dtors in this declaration and look for target
10453   // regions in it. We use the complete variant to produce the kernel name
10454   // mangling.
10455   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10456   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10457     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10458       StringRef ParentName =
10459           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10460       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10461     }
10462     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10463       StringRef ParentName =
10464           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10465       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10466     }
10467   }
10468 
10469   // Do not to emit variable if it is not marked as declare target.
10470   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10471       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10472           cast<VarDecl>(GD.getDecl()));
10473   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10474       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10475        HasRequiresUnifiedSharedMemory)) {
10476     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10477     return true;
10478   }
10479   return false;
10480 }
10481 
10482 llvm::Constant *
10483 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10484                                                 const VarDecl *VD) {
10485   assert(VD->getType().isConstant(CGM.getContext()) &&
10486          "Expected constant variable.");
10487   StringRef VarName;
10488   llvm::Constant *Addr;
10489   llvm::GlobalValue::LinkageTypes Linkage;
10490   QualType Ty = VD->getType();
10491   SmallString<128> Buffer;
10492   {
10493     unsigned DeviceID;
10494     unsigned FileID;
10495     unsigned Line;
10496     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10497                              FileID, Line);
10498     llvm::raw_svector_ostream OS(Buffer);
10499     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10500        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10501     VarName = OS.str();
10502   }
10503   Linkage = llvm::GlobalValue::InternalLinkage;
10504   Addr =
10505       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10506                                   getDefaultFirstprivateAddressSpace());
10507   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10508   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10509   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10510   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10511       VarName, Addr, VarSize,
10512       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10513   return Addr;
10514 }
10515 
10516 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10517                                                    llvm::Constant *Addr) {
10518   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10519       !CGM.getLangOpts().OpenMPIsDevice)
10520     return;
10521   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10522       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10523   if (!Res) {
10524     if (CGM.getLangOpts().OpenMPIsDevice) {
10525       // Register non-target variables being emitted in device code (debug info
10526       // may cause this).
10527       StringRef VarName = CGM.getMangledName(VD);
10528       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10529     }
10530     return;
10531   }
10532   // Register declare target variables.
10533   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10534   StringRef VarName;
10535   CharUnits VarSize;
10536   llvm::GlobalValue::LinkageTypes Linkage;
10537 
10538   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10539       !HasRequiresUnifiedSharedMemory) {
10540     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10541     VarName = CGM.getMangledName(VD);
10542     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10543       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10544       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10545     } else {
10546       VarSize = CharUnits::Zero();
10547     }
10548     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10549     // Temp solution to prevent optimizations of the internal variables.
10550     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10551       std::string RefName = getName({VarName, "ref"});
10552       if (!CGM.GetGlobalValue(RefName)) {
10553         llvm::Constant *AddrRef =
10554             getOrCreateInternalVariable(Addr->getType(), RefName);
10555         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10556         GVAddrRef->setConstant(/*Val=*/true);
10557         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10558         GVAddrRef->setInitializer(Addr);
10559         CGM.addCompilerUsedGlobal(GVAddrRef);
10560       }
10561     }
10562   } else {
10563     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10564             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10565              HasRequiresUnifiedSharedMemory)) &&
10566            "Declare target attribute must link or to with unified memory.");
10567     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10568       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10569     else
10570       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10571 
10572     if (CGM.getLangOpts().OpenMPIsDevice) {
10573       VarName = Addr->getName();
10574       Addr = nullptr;
10575     } else {
10576       VarName = getAddrOfDeclareTargetVar(VD).getName();
10577       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10578     }
10579     VarSize = CGM.getPointerSize();
10580     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10581   }
10582 
10583   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10584       VarName, Addr, VarSize, Flags, Linkage);
10585 }
10586 
10587 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10588   if (isa<FunctionDecl>(GD.getDecl()) ||
10589       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10590     return emitTargetFunctions(GD);
10591 
10592   return emitTargetGlobalVariable(GD);
10593 }
10594 
10595 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10596   for (const VarDecl *VD : DeferredGlobalVariables) {
10597     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10598         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10599     if (!Res)
10600       continue;
10601     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10602         !HasRequiresUnifiedSharedMemory) {
10603       CGM.EmitGlobal(VD);
10604     } else {
10605       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10606               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10607                HasRequiresUnifiedSharedMemory)) &&
10608              "Expected link clause or to clause with unified memory.");
10609       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10610     }
10611   }
10612 }
10613 
10614 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10615     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10616   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10617          " Expected target-based directive.");
10618 }
10619 
10620 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10621   for (const OMPClause *Clause : D->clauselists()) {
10622     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10623       HasRequiresUnifiedSharedMemory = true;
10624     } else if (const auto *AC =
10625                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10626       switch (AC->getAtomicDefaultMemOrderKind()) {
10627       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10628         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10629         break;
10630       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10631         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10632         break;
10633       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10634         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10635         break;
10636       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10637         break;
10638       }
10639     }
10640   }
10641 }
10642 
10643 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10644   return RequiresAtomicOrdering;
10645 }
10646 
10647 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10648                                                        LangAS &AS) {
10649   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10650     return false;
10651   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10652   switch(A->getAllocatorType()) {
10653   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10654   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10655   // Not supported, fallback to the default mem space.
10656   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10657   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10658   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10659   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10660   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10661   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10662   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10663     AS = LangAS::Default;
10664     return true;
10665   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10666     llvm_unreachable("Expected predefined allocator for the variables with the "
10667                      "static storage.");
10668   }
10669   return false;
10670 }
10671 
10672 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10673   return HasRequiresUnifiedSharedMemory;
10674 }
10675 
10676 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10677     CodeGenModule &CGM)
10678     : CGM(CGM) {
10679   if (CGM.getLangOpts().OpenMPIsDevice) {
10680     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10681     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10682   }
10683 }
10684 
10685 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10686   if (CGM.getLangOpts().OpenMPIsDevice)
10687     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10688 }
10689 
10690 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10691   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10692     return true;
10693 
10694   const auto *D = cast<FunctionDecl>(GD.getDecl());
10695   // Do not to emit function if it is marked as declare target as it was already
10696   // emitted.
10697   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10698     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10699       if (auto *F = dyn_cast_or_null<llvm::Function>(
10700               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10701         return !F->isDeclaration();
10702       return false;
10703     }
10704     return true;
10705   }
10706 
10707   return !AlreadyEmittedTargetDecls.insert(D).second;
10708 }
10709 
10710 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10711   // If we don't have entries or if we are emitting code for the device, we
10712   // don't need to do anything.
10713   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10714       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10715       (OffloadEntriesInfoManager.empty() &&
10716        !HasEmittedDeclareTargetRegion &&
10717        !HasEmittedTargetRegion))
10718     return nullptr;
10719 
10720   // Create and register the function that handles the requires directives.
10721   ASTContext &C = CGM.getContext();
10722 
10723   llvm::Function *RequiresRegFn;
10724   {
10725     CodeGenFunction CGF(CGM);
10726     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10727     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10728     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10729     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10730     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10731     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10732     // TODO: check for other requires clauses.
10733     // The requires directive takes effect only when a target region is
10734     // present in the compilation unit. Otherwise it is ignored and not
10735     // passed to the runtime. This avoids the runtime from throwing an error
10736     // for mismatching requires clauses across compilation units that don't
10737     // contain at least 1 target region.
10738     assert((HasEmittedTargetRegion ||
10739             HasEmittedDeclareTargetRegion ||
10740             !OffloadEntriesInfoManager.empty()) &&
10741            "Target or declare target region expected.");
10742     if (HasRequiresUnifiedSharedMemory)
10743       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10744     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10745                             CGM.getModule(), OMPRTL___tgt_register_requires),
10746                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10747     CGF.FinishFunction();
10748   }
10749   return RequiresRegFn;
10750 }
10751 
10752 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10753                                     const OMPExecutableDirective &D,
10754                                     SourceLocation Loc,
10755                                     llvm::Function *OutlinedFn,
10756                                     ArrayRef<llvm::Value *> CapturedVars) {
10757   if (!CGF.HaveInsertPoint())
10758     return;
10759 
10760   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10761   CodeGenFunction::RunCleanupsScope Scope(CGF);
10762 
10763   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10764   llvm::Value *Args[] = {
10765       RTLoc,
10766       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10767       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10768   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10769   RealArgs.append(std::begin(Args), std::end(Args));
10770   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10771 
10772   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10773       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10774   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10775 }
10776 
10777 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10778                                          const Expr *NumTeams,
10779                                          const Expr *ThreadLimit,
10780                                          SourceLocation Loc) {
10781   if (!CGF.HaveInsertPoint())
10782     return;
10783 
10784   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10785 
10786   llvm::Value *NumTeamsVal =
10787       NumTeams
10788           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10789                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10790           : CGF.Builder.getInt32(0);
10791 
10792   llvm::Value *ThreadLimitVal =
10793       ThreadLimit
10794           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10795                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10796           : CGF.Builder.getInt32(0);
10797 
10798   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10799   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10800                                      ThreadLimitVal};
10801   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10802                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10803                       PushNumTeamsArgs);
10804 }
10805 
10806 void CGOpenMPRuntime::emitTargetDataCalls(
10807     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10808     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10809   if (!CGF.HaveInsertPoint())
10810     return;
10811 
10812   // Action used to replace the default codegen action and turn privatization
10813   // off.
10814   PrePostActionTy NoPrivAction;
10815 
10816   // Generate the code for the opening of the data environment. Capture all the
10817   // arguments of the runtime call by reference because they are used in the
10818   // closing of the region.
10819   auto &&BeginThenGen = [this, &D, Device, &Info,
10820                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10821     // Fill up the arrays with all the mapped variables.
10822     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10823 
10824     // Get map clause information.
10825     MappableExprsHandler MEHandler(D, CGF);
10826     MEHandler.generateAllInfo(CombinedInfo);
10827 
10828     // Fill up the arrays and create the arguments.
10829     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10830                          /*IsNonContiguous=*/true);
10831 
10832     llvm::Value *BasePointersArrayArg = nullptr;
10833     llvm::Value *PointersArrayArg = nullptr;
10834     llvm::Value *SizesArrayArg = nullptr;
10835     llvm::Value *MapTypesArrayArg = nullptr;
10836     llvm::Value *MapNamesArrayArg = nullptr;
10837     llvm::Value *MappersArrayArg = nullptr;
10838     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10839                                  SizesArrayArg, MapTypesArrayArg,
10840                                  MapNamesArrayArg, MappersArrayArg, Info);
10841 
10842     // Emit device ID if any.
10843     llvm::Value *DeviceID = nullptr;
10844     if (Device) {
10845       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10846                                            CGF.Int64Ty, /*isSigned=*/true);
10847     } else {
10848       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10849     }
10850 
10851     // Emit the number of elements in the offloading arrays.
10852     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10853     //
10854     // Source location for the ident struct
10855     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10856 
10857     llvm::Value *OffloadingArgs[] = {RTLoc,
10858                                      DeviceID,
10859                                      PointerNum,
10860                                      BasePointersArrayArg,
10861                                      PointersArrayArg,
10862                                      SizesArrayArg,
10863                                      MapTypesArrayArg,
10864                                      MapNamesArrayArg,
10865                                      MappersArrayArg};
10866     CGF.EmitRuntimeCall(
10867         OMPBuilder.getOrCreateRuntimeFunction(
10868             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10869         OffloadingArgs);
10870 
10871     // If device pointer privatization is required, emit the body of the region
10872     // here. It will have to be duplicated: with and without privatization.
10873     if (!Info.CaptureDeviceAddrMap.empty())
10874       CodeGen(CGF);
10875   };
10876 
10877   // Generate code for the closing of the data region.
10878   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10879                                                 PrePostActionTy &) {
10880     assert(Info.isValid() && "Invalid data environment closing arguments.");
10881 
10882     llvm::Value *BasePointersArrayArg = nullptr;
10883     llvm::Value *PointersArrayArg = nullptr;
10884     llvm::Value *SizesArrayArg = nullptr;
10885     llvm::Value *MapTypesArrayArg = nullptr;
10886     llvm::Value *MapNamesArrayArg = nullptr;
10887     llvm::Value *MappersArrayArg = nullptr;
10888     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10889                                  SizesArrayArg, MapTypesArrayArg,
10890                                  MapNamesArrayArg, MappersArrayArg, Info,
10891                                  {/*ForEndCall=*/true});
10892 
10893     // Emit device ID if any.
10894     llvm::Value *DeviceID = nullptr;
10895     if (Device) {
10896       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10897                                            CGF.Int64Ty, /*isSigned=*/true);
10898     } else {
10899       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10900     }
10901 
10902     // Emit the number of elements in the offloading arrays.
10903     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10904 
10905     // Source location for the ident struct
10906     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10907 
10908     llvm::Value *OffloadingArgs[] = {RTLoc,
10909                                      DeviceID,
10910                                      PointerNum,
10911                                      BasePointersArrayArg,
10912                                      PointersArrayArg,
10913                                      SizesArrayArg,
10914                                      MapTypesArrayArg,
10915                                      MapNamesArrayArg,
10916                                      MappersArrayArg};
10917     CGF.EmitRuntimeCall(
10918         OMPBuilder.getOrCreateRuntimeFunction(
10919             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10920         OffloadingArgs);
10921   };
10922 
10923   // If we need device pointer privatization, we need to emit the body of the
10924   // region with no privatization in the 'else' branch of the conditional.
10925   // Otherwise, we don't have to do anything.
10926   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10927                                                          PrePostActionTy &) {
10928     if (!Info.CaptureDeviceAddrMap.empty()) {
10929       CodeGen.setAction(NoPrivAction);
10930       CodeGen(CGF);
10931     }
10932   };
10933 
10934   // We don't have to do anything to close the region if the if clause evaluates
10935   // to false.
10936   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10937 
10938   if (IfCond) {
10939     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10940   } else {
10941     RegionCodeGenTy RCG(BeginThenGen);
10942     RCG(CGF);
10943   }
10944 
10945   // If we don't require privatization of device pointers, we emit the body in
10946   // between the runtime calls. This avoids duplicating the body code.
10947   if (Info.CaptureDeviceAddrMap.empty()) {
10948     CodeGen.setAction(NoPrivAction);
10949     CodeGen(CGF);
10950   }
10951 
10952   if (IfCond) {
10953     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10954   } else {
10955     RegionCodeGenTy RCG(EndThenGen);
10956     RCG(CGF);
10957   }
10958 }
10959 
10960 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10961     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10962     const Expr *Device) {
10963   if (!CGF.HaveInsertPoint())
10964     return;
10965 
10966   assert((isa<OMPTargetEnterDataDirective>(D) ||
10967           isa<OMPTargetExitDataDirective>(D) ||
10968           isa<OMPTargetUpdateDirective>(D)) &&
10969          "Expecting either target enter, exit data, or update directives.");
10970 
10971   CodeGenFunction::OMPTargetDataInfo InputInfo;
10972   llvm::Value *MapTypesArray = nullptr;
10973   llvm::Value *MapNamesArray = nullptr;
10974   // Generate the code for the opening of the data environment.
10975   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10976                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10977     // Emit device ID if any.
10978     llvm::Value *DeviceID = nullptr;
10979     if (Device) {
10980       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10981                                            CGF.Int64Ty, /*isSigned=*/true);
10982     } else {
10983       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10984     }
10985 
10986     // Emit the number of elements in the offloading arrays.
10987     llvm::Constant *PointerNum =
10988         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10989 
10990     // Source location for the ident struct
10991     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10992 
10993     llvm::Value *OffloadingArgs[] = {RTLoc,
10994                                      DeviceID,
10995                                      PointerNum,
10996                                      InputInfo.BasePointersArray.getPointer(),
10997                                      InputInfo.PointersArray.getPointer(),
10998                                      InputInfo.SizesArray.getPointer(),
10999                                      MapTypesArray,
11000                                      MapNamesArray,
11001                                      InputInfo.MappersArray.getPointer()};
11002 
11003     // Select the right runtime function call for each standalone
11004     // directive.
11005     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11006     RuntimeFunction RTLFn;
11007     switch (D.getDirectiveKind()) {
11008     case OMPD_target_enter_data:
11009       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11010                         : OMPRTL___tgt_target_data_begin_mapper;
11011       break;
11012     case OMPD_target_exit_data:
11013       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11014                         : OMPRTL___tgt_target_data_end_mapper;
11015       break;
11016     case OMPD_target_update:
11017       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11018                         : OMPRTL___tgt_target_data_update_mapper;
11019       break;
11020     case OMPD_parallel:
11021     case OMPD_for:
11022     case OMPD_parallel_for:
11023     case OMPD_parallel_master:
11024     case OMPD_parallel_sections:
11025     case OMPD_for_simd:
11026     case OMPD_parallel_for_simd:
11027     case OMPD_cancel:
11028     case OMPD_cancellation_point:
11029     case OMPD_ordered:
11030     case OMPD_threadprivate:
11031     case OMPD_allocate:
11032     case OMPD_task:
11033     case OMPD_simd:
11034     case OMPD_tile:
11035     case OMPD_sections:
11036     case OMPD_section:
11037     case OMPD_single:
11038     case OMPD_master:
11039     case OMPD_critical:
11040     case OMPD_taskyield:
11041     case OMPD_barrier:
11042     case OMPD_taskwait:
11043     case OMPD_taskgroup:
11044     case OMPD_atomic:
11045     case OMPD_flush:
11046     case OMPD_depobj:
11047     case OMPD_scan:
11048     case OMPD_teams:
11049     case OMPD_target_data:
11050     case OMPD_distribute:
11051     case OMPD_distribute_simd:
11052     case OMPD_distribute_parallel_for:
11053     case OMPD_distribute_parallel_for_simd:
11054     case OMPD_teams_distribute:
11055     case OMPD_teams_distribute_simd:
11056     case OMPD_teams_distribute_parallel_for:
11057     case OMPD_teams_distribute_parallel_for_simd:
11058     case OMPD_declare_simd:
11059     case OMPD_declare_variant:
11060     case OMPD_begin_declare_variant:
11061     case OMPD_end_declare_variant:
11062     case OMPD_declare_target:
11063     case OMPD_end_declare_target:
11064     case OMPD_declare_reduction:
11065     case OMPD_declare_mapper:
11066     case OMPD_taskloop:
11067     case OMPD_taskloop_simd:
11068     case OMPD_master_taskloop:
11069     case OMPD_master_taskloop_simd:
11070     case OMPD_parallel_master_taskloop:
11071     case OMPD_parallel_master_taskloop_simd:
11072     case OMPD_target:
11073     case OMPD_target_simd:
11074     case OMPD_target_teams_distribute:
11075     case OMPD_target_teams_distribute_simd:
11076     case OMPD_target_teams_distribute_parallel_for:
11077     case OMPD_target_teams_distribute_parallel_for_simd:
11078     case OMPD_target_teams:
11079     case OMPD_target_parallel:
11080     case OMPD_target_parallel_for:
11081     case OMPD_target_parallel_for_simd:
11082     case OMPD_requires:
11083     case OMPD_unknown:
11084     default:
11085       llvm_unreachable("Unexpected standalone target data directive.");
11086       break;
11087     }
11088     CGF.EmitRuntimeCall(
11089         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11090         OffloadingArgs);
11091   };
11092 
11093   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11094                           &MapNamesArray](CodeGenFunction &CGF,
11095                                           PrePostActionTy &) {
11096     // Fill up the arrays with all the mapped variables.
11097     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11098 
11099     // Get map clause information.
11100     MappableExprsHandler MEHandler(D, CGF);
11101     MEHandler.generateAllInfo(CombinedInfo);
11102 
11103     TargetDataInfo Info;
11104     // Fill up the arrays and create the arguments.
11105     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11106                          /*IsNonContiguous=*/true);
11107     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11108                              D.hasClausesOfKind<OMPNowaitClause>();
11109     emitOffloadingArraysArgument(
11110         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11111         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11112         {/*ForEndTask=*/false});
11113     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11114     InputInfo.BasePointersArray =
11115         Address(Info.BasePointersArray, CGM.getPointerAlign());
11116     InputInfo.PointersArray =
11117         Address(Info.PointersArray, CGM.getPointerAlign());
11118     InputInfo.SizesArray =
11119         Address(Info.SizesArray, CGM.getPointerAlign());
11120     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11121     MapTypesArray = Info.MapTypesArray;
11122     MapNamesArray = Info.MapNamesArray;
11123     if (RequiresOuterTask)
11124       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11125     else
11126       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11127   };
11128 
11129   if (IfCond) {
11130     emitIfClause(CGF, IfCond, TargetThenGen,
11131                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11132   } else {
11133     RegionCodeGenTy ThenRCG(TargetThenGen);
11134     ThenRCG(CGF);
11135   }
11136 }
11137 
11138 namespace {
11139   /// Kind of parameter in a function with 'declare simd' directive.
11140   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11141   /// Attribute set of the parameter.
11142   struct ParamAttrTy {
11143     ParamKindTy Kind = Vector;
11144     llvm::APSInt StrideOrArg;
11145     llvm::APSInt Alignment;
11146   };
11147 } // namespace
11148 
11149 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11150                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11151   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11152   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11153   // of that clause. The VLEN value must be power of 2.
11154   // In other case the notion of the function`s "characteristic data type" (CDT)
11155   // is used to compute the vector length.
11156   // CDT is defined in the following order:
11157   //   a) For non-void function, the CDT is the return type.
11158   //   b) If the function has any non-uniform, non-linear parameters, then the
11159   //   CDT is the type of the first such parameter.
11160   //   c) If the CDT determined by a) or b) above is struct, union, or class
11161   //   type which is pass-by-value (except for the type that maps to the
11162   //   built-in complex data type), the characteristic data type is int.
11163   //   d) If none of the above three cases is applicable, the CDT is int.
11164   // The VLEN is then determined based on the CDT and the size of vector
11165   // register of that ISA for which current vector version is generated. The
11166   // VLEN is computed using the formula below:
11167   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11168   // where vector register size specified in section 3.2.1 Registers and the
11169   // Stack Frame of original AMD64 ABI document.
11170   QualType RetType = FD->getReturnType();
11171   if (RetType.isNull())
11172     return 0;
11173   ASTContext &C = FD->getASTContext();
11174   QualType CDT;
11175   if (!RetType.isNull() && !RetType->isVoidType()) {
11176     CDT = RetType;
11177   } else {
11178     unsigned Offset = 0;
11179     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11180       if (ParamAttrs[Offset].Kind == Vector)
11181         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11182       ++Offset;
11183     }
11184     if (CDT.isNull()) {
11185       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11186         if (ParamAttrs[I + Offset].Kind == Vector) {
11187           CDT = FD->getParamDecl(I)->getType();
11188           break;
11189         }
11190       }
11191     }
11192   }
11193   if (CDT.isNull())
11194     CDT = C.IntTy;
11195   CDT = CDT->getCanonicalTypeUnqualified();
11196   if (CDT->isRecordType() || CDT->isUnionType())
11197     CDT = C.IntTy;
11198   return C.getTypeSize(CDT);
11199 }
11200 
11201 static void
11202 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11203                            const llvm::APSInt &VLENVal,
11204                            ArrayRef<ParamAttrTy> ParamAttrs,
11205                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11206   struct ISADataTy {
11207     char ISA;
11208     unsigned VecRegSize;
11209   };
11210   ISADataTy ISAData[] = {
11211       {
11212           'b', 128
11213       }, // SSE
11214       {
11215           'c', 256
11216       }, // AVX
11217       {
11218           'd', 256
11219       }, // AVX2
11220       {
11221           'e', 512
11222       }, // AVX512
11223   };
11224   llvm::SmallVector<char, 2> Masked;
11225   switch (State) {
11226   case OMPDeclareSimdDeclAttr::BS_Undefined:
11227     Masked.push_back('N');
11228     Masked.push_back('M');
11229     break;
11230   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11231     Masked.push_back('N');
11232     break;
11233   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11234     Masked.push_back('M');
11235     break;
11236   }
11237   for (char Mask : Masked) {
11238     for (const ISADataTy &Data : ISAData) {
11239       SmallString<256> Buffer;
11240       llvm::raw_svector_ostream Out(Buffer);
11241       Out << "_ZGV" << Data.ISA << Mask;
11242       if (!VLENVal) {
11243         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11244         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11245         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11246       } else {
11247         Out << VLENVal;
11248       }
11249       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11250         switch (ParamAttr.Kind){
11251         case LinearWithVarStride:
11252           Out << 's' << ParamAttr.StrideOrArg;
11253           break;
11254         case Linear:
11255           Out << 'l';
11256           if (ParamAttr.StrideOrArg != 1)
11257             Out << ParamAttr.StrideOrArg;
11258           break;
11259         case Uniform:
11260           Out << 'u';
11261           break;
11262         case Vector:
11263           Out << 'v';
11264           break;
11265         }
11266         if (!!ParamAttr.Alignment)
11267           Out << 'a' << ParamAttr.Alignment;
11268       }
11269       Out << '_' << Fn->getName();
11270       Fn->addFnAttr(Out.str());
11271     }
11272   }
11273 }
11274 
11275 // This are the Functions that are needed to mangle the name of the
11276 // vector functions generated by the compiler, according to the rules
11277 // defined in the "Vector Function ABI specifications for AArch64",
11278 // available at
11279 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11280 
11281 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11282 ///
11283 /// TODO: Need to implement the behavior for reference marked with a
11284 /// var or no linear modifiers (1.b in the section). For this, we
11285 /// need to extend ParamKindTy to support the linear modifiers.
11286 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11287   QT = QT.getCanonicalType();
11288 
11289   if (QT->isVoidType())
11290     return false;
11291 
11292   if (Kind == ParamKindTy::Uniform)
11293     return false;
11294 
11295   if (Kind == ParamKindTy::Linear)
11296     return false;
11297 
11298   // TODO: Handle linear references with modifiers
11299 
11300   if (Kind == ParamKindTy::LinearWithVarStride)
11301     return false;
11302 
11303   return true;
11304 }
11305 
11306 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11307 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11308   QT = QT.getCanonicalType();
11309   unsigned Size = C.getTypeSize(QT);
11310 
11311   // Only scalars and complex within 16 bytes wide set PVB to true.
11312   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11313     return false;
11314 
11315   if (QT->isFloatingType())
11316     return true;
11317 
11318   if (QT->isIntegerType())
11319     return true;
11320 
11321   if (QT->isPointerType())
11322     return true;
11323 
11324   // TODO: Add support for complex types (section 3.1.2, item 2).
11325 
11326   return false;
11327 }
11328 
11329 /// Computes the lane size (LS) of a return type or of an input parameter,
11330 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11331 /// TODO: Add support for references, section 3.2.1, item 1.
11332 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11333   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11334     QualType PTy = QT.getCanonicalType()->getPointeeType();
11335     if (getAArch64PBV(PTy, C))
11336       return C.getTypeSize(PTy);
11337   }
11338   if (getAArch64PBV(QT, C))
11339     return C.getTypeSize(QT);
11340 
11341   return C.getTypeSize(C.getUIntPtrType());
11342 }
11343 
11344 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11345 // signature of the scalar function, as defined in 3.2.2 of the
11346 // AAVFABI.
11347 static std::tuple<unsigned, unsigned, bool>
11348 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11349   QualType RetType = FD->getReturnType().getCanonicalType();
11350 
11351   ASTContext &C = FD->getASTContext();
11352 
11353   bool OutputBecomesInput = false;
11354 
11355   llvm::SmallVector<unsigned, 8> Sizes;
11356   if (!RetType->isVoidType()) {
11357     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11358     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11359       OutputBecomesInput = true;
11360   }
11361   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11362     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11363     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11364   }
11365 
11366   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11367   // The LS of a function parameter / return value can only be a power
11368   // of 2, starting from 8 bits, up to 128.
11369   assert(std::all_of(Sizes.begin(), Sizes.end(),
11370                      [](unsigned Size) {
11371                        return Size == 8 || Size == 16 || Size == 32 ||
11372                               Size == 64 || Size == 128;
11373                      }) &&
11374          "Invalid size");
11375 
11376   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11377                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11378                          OutputBecomesInput);
11379 }
11380 
11381 /// Mangle the parameter part of the vector function name according to
11382 /// their OpenMP classification. The mangling function is defined in
11383 /// section 3.5 of the AAVFABI.
11384 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11385   SmallString<256> Buffer;
11386   llvm::raw_svector_ostream Out(Buffer);
11387   for (const auto &ParamAttr : ParamAttrs) {
11388     switch (ParamAttr.Kind) {
11389     case LinearWithVarStride:
11390       Out << "ls" << ParamAttr.StrideOrArg;
11391       break;
11392     case Linear:
11393       Out << 'l';
11394       // Don't print the step value if it is not present or if it is
11395       // equal to 1.
11396       if (ParamAttr.StrideOrArg != 1)
11397         Out << ParamAttr.StrideOrArg;
11398       break;
11399     case Uniform:
11400       Out << 'u';
11401       break;
11402     case Vector:
11403       Out << 'v';
11404       break;
11405     }
11406 
11407     if (!!ParamAttr.Alignment)
11408       Out << 'a' << ParamAttr.Alignment;
11409   }
11410 
11411   return std::string(Out.str());
11412 }
11413 
11414 // Function used to add the attribute. The parameter `VLEN` is
11415 // templated to allow the use of "x" when targeting scalable functions
11416 // for SVE.
11417 template <typename T>
11418 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11419                                  char ISA, StringRef ParSeq,
11420                                  StringRef MangledName, bool OutputBecomesInput,
11421                                  llvm::Function *Fn) {
11422   SmallString<256> Buffer;
11423   llvm::raw_svector_ostream Out(Buffer);
11424   Out << Prefix << ISA << LMask << VLEN;
11425   if (OutputBecomesInput)
11426     Out << "v";
11427   Out << ParSeq << "_" << MangledName;
11428   Fn->addFnAttr(Out.str());
11429 }
11430 
11431 // Helper function to generate the Advanced SIMD names depending on
11432 // the value of the NDS when simdlen is not present.
11433 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11434                                       StringRef Prefix, char ISA,
11435                                       StringRef ParSeq, StringRef MangledName,
11436                                       bool OutputBecomesInput,
11437                                       llvm::Function *Fn) {
11438   switch (NDS) {
11439   case 8:
11440     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11441                          OutputBecomesInput, Fn);
11442     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11443                          OutputBecomesInput, Fn);
11444     break;
11445   case 16:
11446     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11447                          OutputBecomesInput, Fn);
11448     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11449                          OutputBecomesInput, Fn);
11450     break;
11451   case 32:
11452     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11453                          OutputBecomesInput, Fn);
11454     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11455                          OutputBecomesInput, Fn);
11456     break;
11457   case 64:
11458   case 128:
11459     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11460                          OutputBecomesInput, Fn);
11461     break;
11462   default:
11463     llvm_unreachable("Scalar type is too wide.");
11464   }
11465 }
11466 
11467 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11468 static void emitAArch64DeclareSimdFunction(
11469     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11470     ArrayRef<ParamAttrTy> ParamAttrs,
11471     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11472     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11473 
11474   // Get basic data for building the vector signature.
11475   const auto Data = getNDSWDS(FD, ParamAttrs);
11476   const unsigned NDS = std::get<0>(Data);
11477   const unsigned WDS = std::get<1>(Data);
11478   const bool OutputBecomesInput = std::get<2>(Data);
11479 
11480   // Check the values provided via `simdlen` by the user.
11481   // 1. A `simdlen(1)` doesn't produce vector signatures,
11482   if (UserVLEN == 1) {
11483     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11484         DiagnosticsEngine::Warning,
11485         "The clause simdlen(1) has no effect when targeting aarch64.");
11486     CGM.getDiags().Report(SLoc, DiagID);
11487     return;
11488   }
11489 
11490   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11491   // Advanced SIMD output.
11492   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11493     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11494         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11495                                     "power of 2 when targeting Advanced SIMD.");
11496     CGM.getDiags().Report(SLoc, DiagID);
11497     return;
11498   }
11499 
11500   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11501   // limits.
11502   if (ISA == 's' && UserVLEN != 0) {
11503     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11504       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11505           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11506                                       "lanes in the architectural constraints "
11507                                       "for SVE (min is 128-bit, max is "
11508                                       "2048-bit, by steps of 128-bit)");
11509       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11510       return;
11511     }
11512   }
11513 
11514   // Sort out parameter sequence.
11515   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11516   StringRef Prefix = "_ZGV";
11517   // Generate simdlen from user input (if any).
11518   if (UserVLEN) {
11519     if (ISA == 's') {
11520       // SVE generates only a masked function.
11521       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11522                            OutputBecomesInput, Fn);
11523     } else {
11524       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11525       // Advanced SIMD generates one or two functions, depending on
11526       // the `[not]inbranch` clause.
11527       switch (State) {
11528       case OMPDeclareSimdDeclAttr::BS_Undefined:
11529         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11530                              OutputBecomesInput, Fn);
11531         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11532                              OutputBecomesInput, Fn);
11533         break;
11534       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11535         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11536                              OutputBecomesInput, Fn);
11537         break;
11538       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11539         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11540                              OutputBecomesInput, Fn);
11541         break;
11542       }
11543     }
11544   } else {
11545     // If no user simdlen is provided, follow the AAVFABI rules for
11546     // generating the vector length.
11547     if (ISA == 's') {
11548       // SVE, section 3.4.1, item 1.
11549       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11550                            OutputBecomesInput, Fn);
11551     } else {
11552       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11553       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11554       // two vector names depending on the use of the clause
11555       // `[not]inbranch`.
11556       switch (State) {
11557       case OMPDeclareSimdDeclAttr::BS_Undefined:
11558         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11559                                   OutputBecomesInput, Fn);
11560         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11561                                   OutputBecomesInput, Fn);
11562         break;
11563       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11564         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11565                                   OutputBecomesInput, Fn);
11566         break;
11567       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11568         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11569                                   OutputBecomesInput, Fn);
11570         break;
11571       }
11572     }
11573   }
11574 }
11575 
11576 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11577                                               llvm::Function *Fn) {
11578   ASTContext &C = CGM.getContext();
11579   FD = FD->getMostRecentDecl();
11580   // Map params to their positions in function decl.
11581   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11582   if (isa<CXXMethodDecl>(FD))
11583     ParamPositions.try_emplace(FD, 0);
11584   unsigned ParamPos = ParamPositions.size();
11585   for (const ParmVarDecl *P : FD->parameters()) {
11586     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11587     ++ParamPos;
11588   }
11589   while (FD) {
11590     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11591       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11592       // Mark uniform parameters.
11593       for (const Expr *E : Attr->uniforms()) {
11594         E = E->IgnoreParenImpCasts();
11595         unsigned Pos;
11596         if (isa<CXXThisExpr>(E)) {
11597           Pos = ParamPositions[FD];
11598         } else {
11599           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11600                                 ->getCanonicalDecl();
11601           Pos = ParamPositions[PVD];
11602         }
11603         ParamAttrs[Pos].Kind = Uniform;
11604       }
11605       // Get alignment info.
11606       auto NI = Attr->alignments_begin();
11607       for (const Expr *E : Attr->aligneds()) {
11608         E = E->IgnoreParenImpCasts();
11609         unsigned Pos;
11610         QualType ParmTy;
11611         if (isa<CXXThisExpr>(E)) {
11612           Pos = ParamPositions[FD];
11613           ParmTy = E->getType();
11614         } else {
11615           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11616                                 ->getCanonicalDecl();
11617           Pos = ParamPositions[PVD];
11618           ParmTy = PVD->getType();
11619         }
11620         ParamAttrs[Pos].Alignment =
11621             (*NI)
11622                 ? (*NI)->EvaluateKnownConstInt(C)
11623                 : llvm::APSInt::getUnsigned(
11624                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11625                           .getQuantity());
11626         ++NI;
11627       }
11628       // Mark linear parameters.
11629       auto SI = Attr->steps_begin();
11630       auto MI = Attr->modifiers_begin();
11631       for (const Expr *E : Attr->linears()) {
11632         E = E->IgnoreParenImpCasts();
11633         unsigned Pos;
11634         // Rescaling factor needed to compute the linear parameter
11635         // value in the mangled name.
11636         unsigned PtrRescalingFactor = 1;
11637         if (isa<CXXThisExpr>(E)) {
11638           Pos = ParamPositions[FD];
11639         } else {
11640           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11641                                 ->getCanonicalDecl();
11642           Pos = ParamPositions[PVD];
11643           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11644             PtrRescalingFactor = CGM.getContext()
11645                                      .getTypeSizeInChars(P->getPointeeType())
11646                                      .getQuantity();
11647         }
11648         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11649         ParamAttr.Kind = Linear;
11650         // Assuming a stride of 1, for `linear` without modifiers.
11651         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11652         if (*SI) {
11653           Expr::EvalResult Result;
11654           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11655             if (const auto *DRE =
11656                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11657               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11658                 ParamAttr.Kind = LinearWithVarStride;
11659                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11660                     ParamPositions[StridePVD->getCanonicalDecl()]);
11661               }
11662             }
11663           } else {
11664             ParamAttr.StrideOrArg = Result.Val.getInt();
11665           }
11666         }
11667         // If we are using a linear clause on a pointer, we need to
11668         // rescale the value of linear_step with the byte size of the
11669         // pointee type.
11670         if (Linear == ParamAttr.Kind)
11671           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11672         ++SI;
11673         ++MI;
11674       }
11675       llvm::APSInt VLENVal;
11676       SourceLocation ExprLoc;
11677       const Expr *VLENExpr = Attr->getSimdlen();
11678       if (VLENExpr) {
11679         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11680         ExprLoc = VLENExpr->getExprLoc();
11681       }
11682       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11683       if (CGM.getTriple().isX86()) {
11684         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11685       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11686         unsigned VLEN = VLENVal.getExtValue();
11687         StringRef MangledName = Fn->getName();
11688         if (CGM.getTarget().hasFeature("sve"))
11689           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11690                                          MangledName, 's', 128, Fn, ExprLoc);
11691         if (CGM.getTarget().hasFeature("neon"))
11692           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11693                                          MangledName, 'n', 128, Fn, ExprLoc);
11694       }
11695     }
11696     FD = FD->getPreviousDecl();
11697   }
11698 }
11699 
11700 namespace {
11701 /// Cleanup action for doacross support.
11702 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11703 public:
11704   static const int DoacrossFinArgs = 2;
11705 
11706 private:
11707   llvm::FunctionCallee RTLFn;
11708   llvm::Value *Args[DoacrossFinArgs];
11709 
11710 public:
11711   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11712                     ArrayRef<llvm::Value *> CallArgs)
11713       : RTLFn(RTLFn) {
11714     assert(CallArgs.size() == DoacrossFinArgs);
11715     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11716   }
11717   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11718     if (!CGF.HaveInsertPoint())
11719       return;
11720     CGF.EmitRuntimeCall(RTLFn, Args);
11721   }
11722 };
11723 } // namespace
11724 
11725 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11726                                        const OMPLoopDirective &D,
11727                                        ArrayRef<Expr *> NumIterations) {
11728   if (!CGF.HaveInsertPoint())
11729     return;
11730 
11731   ASTContext &C = CGM.getContext();
11732   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11733   RecordDecl *RD;
11734   if (KmpDimTy.isNull()) {
11735     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11736     //  kmp_int64 lo; // lower
11737     //  kmp_int64 up; // upper
11738     //  kmp_int64 st; // stride
11739     // };
11740     RD = C.buildImplicitRecord("kmp_dim");
11741     RD->startDefinition();
11742     addFieldToRecordDecl(C, RD, Int64Ty);
11743     addFieldToRecordDecl(C, RD, Int64Ty);
11744     addFieldToRecordDecl(C, RD, Int64Ty);
11745     RD->completeDefinition();
11746     KmpDimTy = C.getRecordType(RD);
11747   } else {
11748     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11749   }
11750   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11751   QualType ArrayTy =
11752       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11753 
11754   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11755   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11756   enum { LowerFD = 0, UpperFD, StrideFD };
11757   // Fill dims with data.
11758   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11759     LValue DimsLVal = CGF.MakeAddrLValue(
11760         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11761     // dims.upper = num_iterations;
11762     LValue UpperLVal = CGF.EmitLValueForField(
11763         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11764     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11765         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11766         Int64Ty, NumIterations[I]->getExprLoc());
11767     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11768     // dims.stride = 1;
11769     LValue StrideLVal = CGF.EmitLValueForField(
11770         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11771     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11772                           StrideLVal);
11773   }
11774 
11775   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11776   // kmp_int32 num_dims, struct kmp_dim * dims);
11777   llvm::Value *Args[] = {
11778       emitUpdateLocation(CGF, D.getBeginLoc()),
11779       getThreadID(CGF, D.getBeginLoc()),
11780       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11781       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11782           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11783           CGM.VoidPtrTy)};
11784 
11785   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11786       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11787   CGF.EmitRuntimeCall(RTLFn, Args);
11788   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11789       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11790   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11791       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11792   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11793                                              llvm::makeArrayRef(FiniArgs));
11794 }
11795 
11796 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11797                                           const OMPDependClause *C) {
11798   QualType Int64Ty =
11799       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11800   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11801   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11802       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11803   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11804   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11805     const Expr *CounterVal = C->getLoopData(I);
11806     assert(CounterVal);
11807     llvm::Value *CntVal = CGF.EmitScalarConversion(
11808         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11809         CounterVal->getExprLoc());
11810     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11811                           /*Volatile=*/false, Int64Ty);
11812   }
11813   llvm::Value *Args[] = {
11814       emitUpdateLocation(CGF, C->getBeginLoc()),
11815       getThreadID(CGF, C->getBeginLoc()),
11816       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11817   llvm::FunctionCallee RTLFn;
11818   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11819     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11820                                                   OMPRTL___kmpc_doacross_post);
11821   } else {
11822     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11823     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11824                                                   OMPRTL___kmpc_doacross_wait);
11825   }
11826   CGF.EmitRuntimeCall(RTLFn, Args);
11827 }
11828 
11829 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11830                                llvm::FunctionCallee Callee,
11831                                ArrayRef<llvm::Value *> Args) const {
11832   assert(Loc.isValid() && "Outlined function call location must be valid.");
11833   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11834 
11835   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11836     if (Fn->doesNotThrow()) {
11837       CGF.EmitNounwindRuntimeCall(Fn, Args);
11838       return;
11839     }
11840   }
11841   CGF.EmitRuntimeCall(Callee, Args);
11842 }
11843 
11844 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11845     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11846     ArrayRef<llvm::Value *> Args) const {
11847   emitCall(CGF, Loc, OutlinedFn, Args);
11848 }
11849 
11850 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11851   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11852     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11853       HasEmittedDeclareTargetRegion = true;
11854 }
11855 
11856 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11857                                              const VarDecl *NativeParam,
11858                                              const VarDecl *TargetParam) const {
11859   return CGF.GetAddrOfLocalVar(NativeParam);
11860 }
11861 
11862 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11863                                                    const VarDecl *VD) {
11864   if (!VD)
11865     return Address::invalid();
11866   Address UntiedAddr = Address::invalid();
11867   Address UntiedRealAddr = Address::invalid();
11868   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11869   if (It != FunctionToUntiedTaskStackMap.end()) {
11870     const UntiedLocalVarsAddressesMap &UntiedData =
11871         UntiedLocalVarsStack[It->second];
11872     auto I = UntiedData.find(VD);
11873     if (I != UntiedData.end()) {
11874       UntiedAddr = I->second.first;
11875       UntiedRealAddr = I->second.second;
11876     }
11877   }
11878   const VarDecl *CVD = VD->getCanonicalDecl();
11879   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11880     // Use the default allocation.
11881     if (!isAllocatableDecl(VD))
11882       return UntiedAddr;
11883     llvm::Value *Size;
11884     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11885     if (CVD->getType()->isVariablyModifiedType()) {
11886       Size = CGF.getTypeSize(CVD->getType());
11887       // Align the size: ((size + align - 1) / align) * align
11888       Size = CGF.Builder.CreateNUWAdd(
11889           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11890       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11891       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11892     } else {
11893       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11894       Size = CGM.getSize(Sz.alignTo(Align));
11895     }
11896     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11897     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11898     assert(AA->getAllocator() &&
11899            "Expected allocator expression for non-default allocator.");
11900     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11901     // According to the standard, the original allocator type is a enum
11902     // (integer). Convert to pointer type, if required.
11903     Allocator = CGF.EmitScalarConversion(
11904         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11905         AA->getAllocator()->getExprLoc());
11906     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11907 
11908     llvm::Value *Addr =
11909         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11910                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11911                             Args, getName({CVD->getName(), ".void.addr"}));
11912     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11913         CGM.getModule(), OMPRTL___kmpc_free);
11914     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11915     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11916         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11917     if (UntiedAddr.isValid())
11918       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11919 
11920     // Cleanup action for allocate support.
11921     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11922       llvm::FunctionCallee RTLFn;
11923       unsigned LocEncoding;
11924       Address Addr;
11925       const Expr *Allocator;
11926 
11927     public:
11928       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11929                            Address Addr, const Expr *Allocator)
11930           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11931             Allocator(Allocator) {}
11932       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11933         if (!CGF.HaveInsertPoint())
11934           return;
11935         llvm::Value *Args[3];
11936         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11937             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11938         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11939             Addr.getPointer(), CGF.VoidPtrTy);
11940         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11941         // According to the standard, the original allocator type is a enum
11942         // (integer). Convert to pointer type, if required.
11943         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11944                                             CGF.getContext().VoidPtrTy,
11945                                             Allocator->getExprLoc());
11946         Args[2] = AllocVal;
11947 
11948         CGF.EmitRuntimeCall(RTLFn, Args);
11949       }
11950     };
11951     Address VDAddr =
11952         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11953     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11954         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11955         VDAddr, AA->getAllocator());
11956     if (UntiedRealAddr.isValid())
11957       if (auto *Region =
11958               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11959         Region->emitUntiedSwitch(CGF);
11960     return VDAddr;
11961   }
11962   return UntiedAddr;
11963 }
11964 
11965 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11966                                              const VarDecl *VD) const {
11967   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11968   if (It == FunctionToUntiedTaskStackMap.end())
11969     return false;
11970   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11971 }
11972 
11973 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11974     CodeGenModule &CGM, const OMPLoopDirective &S)
11975     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11976   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11977   if (!NeedToPush)
11978     return;
11979   NontemporalDeclsSet &DS =
11980       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11981   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11982     for (const Stmt *Ref : C->private_refs()) {
11983       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11984       const ValueDecl *VD;
11985       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11986         VD = DRE->getDecl();
11987       } else {
11988         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11989         assert((ME->isImplicitCXXThis() ||
11990                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11991                "Expected member of current class.");
11992         VD = ME->getMemberDecl();
11993       }
11994       DS.insert(VD);
11995     }
11996   }
11997 }
11998 
11999 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12000   if (!NeedToPush)
12001     return;
12002   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12003 }
12004 
12005 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12006     CodeGenFunction &CGF,
12007     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12008                          std::pair<Address, Address>> &LocalVars)
12009     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12010   if (!NeedToPush)
12011     return;
12012   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12013       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12014   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12015 }
12016 
12017 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12018   if (!NeedToPush)
12019     return;
12020   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12021 }
12022 
12023 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12024   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12025 
12026   return llvm::any_of(
12027       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12028       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12029 }
12030 
12031 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12032     const OMPExecutableDirective &S,
12033     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12034     const {
12035   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12036   // Vars in target/task regions must be excluded completely.
12037   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12038       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12039     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12040     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12041     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12042     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12043       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12044         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12045     }
12046   }
12047   // Exclude vars in private clauses.
12048   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12049     for (const Expr *Ref : C->varlists()) {
12050       if (!Ref->getType()->isScalarType())
12051         continue;
12052       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12053       if (!DRE)
12054         continue;
12055       NeedToCheckForLPCs.insert(DRE->getDecl());
12056     }
12057   }
12058   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12059     for (const Expr *Ref : C->varlists()) {
12060       if (!Ref->getType()->isScalarType())
12061         continue;
12062       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12063       if (!DRE)
12064         continue;
12065       NeedToCheckForLPCs.insert(DRE->getDecl());
12066     }
12067   }
12068   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12069     for (const Expr *Ref : C->varlists()) {
12070       if (!Ref->getType()->isScalarType())
12071         continue;
12072       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12073       if (!DRE)
12074         continue;
12075       NeedToCheckForLPCs.insert(DRE->getDecl());
12076     }
12077   }
12078   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12079     for (const Expr *Ref : C->varlists()) {
12080       if (!Ref->getType()->isScalarType())
12081         continue;
12082       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12083       if (!DRE)
12084         continue;
12085       NeedToCheckForLPCs.insert(DRE->getDecl());
12086     }
12087   }
12088   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12089     for (const Expr *Ref : C->varlists()) {
12090       if (!Ref->getType()->isScalarType())
12091         continue;
12092       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12093       if (!DRE)
12094         continue;
12095       NeedToCheckForLPCs.insert(DRE->getDecl());
12096     }
12097   }
12098   for (const Decl *VD : NeedToCheckForLPCs) {
12099     for (const LastprivateConditionalData &Data :
12100          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12101       if (Data.DeclToUniqueName.count(VD) > 0) {
12102         if (!Data.Disabled)
12103           NeedToAddForLPCsAsDisabled.insert(VD);
12104         break;
12105       }
12106     }
12107   }
12108 }
12109 
12110 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12111     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12112     : CGM(CGF.CGM),
12113       Action((CGM.getLangOpts().OpenMP >= 50 &&
12114               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12115                            [](const OMPLastprivateClause *C) {
12116                              return C->getKind() ==
12117                                     OMPC_LASTPRIVATE_conditional;
12118                            }))
12119                  ? ActionToDo::PushAsLastprivateConditional
12120                  : ActionToDo::DoNotPush) {
12121   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12122   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12123     return;
12124   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12125          "Expected a push action.");
12126   LastprivateConditionalData &Data =
12127       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12128   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12129     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12130       continue;
12131 
12132     for (const Expr *Ref : C->varlists()) {
12133       Data.DeclToUniqueName.insert(std::make_pair(
12134           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12135           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12136     }
12137   }
12138   Data.IVLVal = IVLVal;
12139   Data.Fn = CGF.CurFn;
12140 }
12141 
12142 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12143     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12144     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12145   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12146   if (CGM.getLangOpts().OpenMP < 50)
12147     return;
12148   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12149   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12150   if (!NeedToAddForLPCsAsDisabled.empty()) {
12151     Action = ActionToDo::DisableLastprivateConditional;
12152     LastprivateConditionalData &Data =
12153         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12154     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12155       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12156     Data.Fn = CGF.CurFn;
12157     Data.Disabled = true;
12158   }
12159 }
12160 
12161 CGOpenMPRuntime::LastprivateConditionalRAII
12162 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12163     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12164   return LastprivateConditionalRAII(CGF, S);
12165 }
12166 
12167 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12168   if (CGM.getLangOpts().OpenMP < 50)
12169     return;
12170   if (Action == ActionToDo::DisableLastprivateConditional) {
12171     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12172            "Expected list of disabled private vars.");
12173     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12174   }
12175   if (Action == ActionToDo::PushAsLastprivateConditional) {
12176     assert(
12177         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12178         "Expected list of lastprivate conditional vars.");
12179     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12180   }
12181 }
12182 
12183 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12184                                                         const VarDecl *VD) {
12185   ASTContext &C = CGM.getContext();
12186   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12187   if (I == LastprivateConditionalToTypes.end())
12188     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12189   QualType NewType;
12190   const FieldDecl *VDField;
12191   const FieldDecl *FiredField;
12192   LValue BaseLVal;
12193   auto VI = I->getSecond().find(VD);
12194   if (VI == I->getSecond().end()) {
12195     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12196     RD->startDefinition();
12197     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12198     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12199     RD->completeDefinition();
12200     NewType = C.getRecordType(RD);
12201     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12202     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12203     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12204   } else {
12205     NewType = std::get<0>(VI->getSecond());
12206     VDField = std::get<1>(VI->getSecond());
12207     FiredField = std::get<2>(VI->getSecond());
12208     BaseLVal = std::get<3>(VI->getSecond());
12209   }
12210   LValue FiredLVal =
12211       CGF.EmitLValueForField(BaseLVal, FiredField);
12212   CGF.EmitStoreOfScalar(
12213       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12214       FiredLVal);
12215   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12216 }
12217 
12218 namespace {
12219 /// Checks if the lastprivate conditional variable is referenced in LHS.
12220 class LastprivateConditionalRefChecker final
12221     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12222   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12223   const Expr *FoundE = nullptr;
12224   const Decl *FoundD = nullptr;
12225   StringRef UniqueDeclName;
12226   LValue IVLVal;
12227   llvm::Function *FoundFn = nullptr;
12228   SourceLocation Loc;
12229 
12230 public:
12231   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12232     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12233          llvm::reverse(LPM)) {
12234       auto It = D.DeclToUniqueName.find(E->getDecl());
12235       if (It == D.DeclToUniqueName.end())
12236         continue;
12237       if (D.Disabled)
12238         return false;
12239       FoundE = E;
12240       FoundD = E->getDecl()->getCanonicalDecl();
12241       UniqueDeclName = It->second;
12242       IVLVal = D.IVLVal;
12243       FoundFn = D.Fn;
12244       break;
12245     }
12246     return FoundE == E;
12247   }
12248   bool VisitMemberExpr(const MemberExpr *E) {
12249     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12250       return false;
12251     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12252          llvm::reverse(LPM)) {
12253       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12254       if (It == D.DeclToUniqueName.end())
12255         continue;
12256       if (D.Disabled)
12257         return false;
12258       FoundE = E;
12259       FoundD = E->getMemberDecl()->getCanonicalDecl();
12260       UniqueDeclName = It->second;
12261       IVLVal = D.IVLVal;
12262       FoundFn = D.Fn;
12263       break;
12264     }
12265     return FoundE == E;
12266   }
12267   bool VisitStmt(const Stmt *S) {
12268     for (const Stmt *Child : S->children()) {
12269       if (!Child)
12270         continue;
12271       if (const auto *E = dyn_cast<Expr>(Child))
12272         if (!E->isGLValue())
12273           continue;
12274       if (Visit(Child))
12275         return true;
12276     }
12277     return false;
12278   }
12279   explicit LastprivateConditionalRefChecker(
12280       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12281       : LPM(LPM) {}
12282   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12283   getFoundData() const {
12284     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12285   }
12286 };
12287 } // namespace
12288 
12289 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12290                                                        LValue IVLVal,
12291                                                        StringRef UniqueDeclName,
12292                                                        LValue LVal,
12293                                                        SourceLocation Loc) {
12294   // Last updated loop counter for the lastprivate conditional var.
12295   // int<xx> last_iv = 0;
12296   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12297   llvm::Constant *LastIV =
12298       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12299   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12300       IVLVal.getAlignment().getAsAlign());
12301   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12302 
12303   // Last value of the lastprivate conditional.
12304   // decltype(priv_a) last_a;
12305   llvm::Constant *Last = getOrCreateInternalVariable(
12306       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12307   cast<llvm::GlobalVariable>(Last)->setAlignment(
12308       LVal.getAlignment().getAsAlign());
12309   LValue LastLVal =
12310       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12311 
12312   // Global loop counter. Required to handle inner parallel-for regions.
12313   // iv
12314   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12315 
12316   // #pragma omp critical(a)
12317   // if (last_iv <= iv) {
12318   //   last_iv = iv;
12319   //   last_a = priv_a;
12320   // }
12321   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12322                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12323     Action.Enter(CGF);
12324     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12325     // (last_iv <= iv) ? Check if the variable is updated and store new
12326     // value in global var.
12327     llvm::Value *CmpRes;
12328     if (IVLVal.getType()->isSignedIntegerType()) {
12329       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12330     } else {
12331       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12332              "Loop iteration variable must be integer.");
12333       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12334     }
12335     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12336     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12337     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12338     // {
12339     CGF.EmitBlock(ThenBB);
12340 
12341     //   last_iv = iv;
12342     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12343 
12344     //   last_a = priv_a;
12345     switch (CGF.getEvaluationKind(LVal.getType())) {
12346     case TEK_Scalar: {
12347       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12348       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12349       break;
12350     }
12351     case TEK_Complex: {
12352       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12353       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12354       break;
12355     }
12356     case TEK_Aggregate:
12357       llvm_unreachable(
12358           "Aggregates are not supported in lastprivate conditional.");
12359     }
12360     // }
12361     CGF.EmitBranch(ExitBB);
12362     // There is no need to emit line number for unconditional branch.
12363     (void)ApplyDebugLocation::CreateEmpty(CGF);
12364     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12365   };
12366 
12367   if (CGM.getLangOpts().OpenMPSimd) {
12368     // Do not emit as a critical region as no parallel region could be emitted.
12369     RegionCodeGenTy ThenRCG(CodeGen);
12370     ThenRCG(CGF);
12371   } else {
12372     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12373   }
12374 }
12375 
12376 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12377                                                          const Expr *LHS) {
12378   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12379     return;
12380   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12381   if (!Checker.Visit(LHS))
12382     return;
12383   const Expr *FoundE;
12384   const Decl *FoundD;
12385   StringRef UniqueDeclName;
12386   LValue IVLVal;
12387   llvm::Function *FoundFn;
12388   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12389       Checker.getFoundData();
12390   if (FoundFn != CGF.CurFn) {
12391     // Special codegen for inner parallel regions.
12392     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12393     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12394     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12395            "Lastprivate conditional is not found in outer region.");
12396     QualType StructTy = std::get<0>(It->getSecond());
12397     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12398     LValue PrivLVal = CGF.EmitLValue(FoundE);
12399     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12400         PrivLVal.getAddress(CGF),
12401         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12402     LValue BaseLVal =
12403         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12404     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12405     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12406                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12407                         FiredLVal, llvm::AtomicOrdering::Unordered,
12408                         /*IsVolatile=*/true, /*isInit=*/false);
12409     return;
12410   }
12411 
12412   // Private address of the lastprivate conditional in the current context.
12413   // priv_a
12414   LValue LVal = CGF.EmitLValue(FoundE);
12415   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12416                                    FoundE->getExprLoc());
12417 }
12418 
12419 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12420     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12421     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12422   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12423     return;
12424   auto Range = llvm::reverse(LastprivateConditionalStack);
12425   auto It = llvm::find_if(
12426       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12427   if (It == Range.end() || It->Fn != CGF.CurFn)
12428     return;
12429   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12430   assert(LPCI != LastprivateConditionalToTypes.end() &&
12431          "Lastprivates must be registered already.");
12432   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12433   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12434   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12435   for (const auto &Pair : It->DeclToUniqueName) {
12436     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12437     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12438       continue;
12439     auto I = LPCI->getSecond().find(Pair.first);
12440     assert(I != LPCI->getSecond().end() &&
12441            "Lastprivate must be rehistered already.");
12442     // bool Cmp = priv_a.Fired != 0;
12443     LValue BaseLVal = std::get<3>(I->getSecond());
12444     LValue FiredLVal =
12445         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12446     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12447     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12448     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12449     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12450     // if (Cmp) {
12451     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12452     CGF.EmitBlock(ThenBB);
12453     Address Addr = CGF.GetAddrOfLocalVar(VD);
12454     LValue LVal;
12455     if (VD->getType()->isReferenceType())
12456       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12457                                            AlignmentSource::Decl);
12458     else
12459       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12460                                 AlignmentSource::Decl);
12461     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12462                                      D.getBeginLoc());
12463     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12464     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12465     // }
12466   }
12467 }
12468 
12469 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12470     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12471     SourceLocation Loc) {
12472   if (CGF.getLangOpts().OpenMP < 50)
12473     return;
12474   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12475   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12476          "Unknown lastprivate conditional variable.");
12477   StringRef UniqueName = It->second;
12478   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12479   // The variable was not updated in the region - exit.
12480   if (!GV)
12481     return;
12482   LValue LPLVal = CGF.MakeAddrLValue(
12483       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12484   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12485   CGF.EmitStoreOfScalar(Res, PrivLVal);
12486 }
12487 
12488 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12489     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12490     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12491   llvm_unreachable("Not supported in SIMD-only mode");
12492 }
12493 
12494 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12495     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12496     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12501     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12502     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12503     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12504     bool Tied, unsigned &NumberOfParts) {
12505   llvm_unreachable("Not supported in SIMD-only mode");
12506 }
12507 
12508 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12509                                            SourceLocation Loc,
12510                                            llvm::Function *OutlinedFn,
12511                                            ArrayRef<llvm::Value *> CapturedVars,
12512                                            const Expr *IfCond) {
12513   llvm_unreachable("Not supported in SIMD-only mode");
12514 }
12515 
12516 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12517     CodeGenFunction &CGF, StringRef CriticalName,
12518     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12519     const Expr *Hint) {
12520   llvm_unreachable("Not supported in SIMD-only mode");
12521 }
12522 
12523 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12524                                            const RegionCodeGenTy &MasterOpGen,
12525                                            SourceLocation Loc) {
12526   llvm_unreachable("Not supported in SIMD-only mode");
12527 }
12528 
12529 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12530                                             SourceLocation Loc) {
12531   llvm_unreachable("Not supported in SIMD-only mode");
12532 }
12533 
12534 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12535     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12536     SourceLocation Loc) {
12537   llvm_unreachable("Not supported in SIMD-only mode");
12538 }
12539 
12540 void CGOpenMPSIMDRuntime::emitSingleRegion(
12541     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12542     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12543     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12544     ArrayRef<const Expr *> AssignmentOps) {
12545   llvm_unreachable("Not supported in SIMD-only mode");
12546 }
12547 
12548 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12549                                             const RegionCodeGenTy &OrderedOpGen,
12550                                             SourceLocation Loc,
12551                                             bool IsThreads) {
12552   llvm_unreachable("Not supported in SIMD-only mode");
12553 }
12554 
12555 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12556                                           SourceLocation Loc,
12557                                           OpenMPDirectiveKind Kind,
12558                                           bool EmitChecks,
12559                                           bool ForceSimpleCall) {
12560   llvm_unreachable("Not supported in SIMD-only mode");
12561 }
12562 
12563 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12564     CodeGenFunction &CGF, SourceLocation Loc,
12565     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12566     bool Ordered, const DispatchRTInput &DispatchValues) {
12567   llvm_unreachable("Not supported in SIMD-only mode");
12568 }
12569 
12570 void CGOpenMPSIMDRuntime::emitForStaticInit(
12571     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12572     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12573   llvm_unreachable("Not supported in SIMD-only mode");
12574 }
12575 
12576 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12577     CodeGenFunction &CGF, SourceLocation Loc,
12578     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12579   llvm_unreachable("Not supported in SIMD-only mode");
12580 }
12581 
12582 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12583                                                      SourceLocation Loc,
12584                                                      unsigned IVSize,
12585                                                      bool IVSigned) {
12586   llvm_unreachable("Not supported in SIMD-only mode");
12587 }
12588 
12589 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12590                                               SourceLocation Loc,
12591                                               OpenMPDirectiveKind DKind) {
12592   llvm_unreachable("Not supported in SIMD-only mode");
12593 }
12594 
12595 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12596                                               SourceLocation Loc,
12597                                               unsigned IVSize, bool IVSigned,
12598                                               Address IL, Address LB,
12599                                               Address UB, Address ST) {
12600   llvm_unreachable("Not supported in SIMD-only mode");
12601 }
12602 
12603 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12604                                                llvm::Value *NumThreads,
12605                                                SourceLocation Loc) {
12606   llvm_unreachable("Not supported in SIMD-only mode");
12607 }
12608 
12609 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12610                                              ProcBindKind ProcBind,
12611                                              SourceLocation Loc) {
12612   llvm_unreachable("Not supported in SIMD-only mode");
12613 }
12614 
12615 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12616                                                     const VarDecl *VD,
12617                                                     Address VDAddr,
12618                                                     SourceLocation Loc) {
12619   llvm_unreachable("Not supported in SIMD-only mode");
12620 }
12621 
12622 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12623     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12624     CodeGenFunction *CGF) {
12625   llvm_unreachable("Not supported in SIMD-only mode");
12626 }
12627 
12628 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12629     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12630   llvm_unreachable("Not supported in SIMD-only mode");
12631 }
12632 
12633 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12634                                     ArrayRef<const Expr *> Vars,
12635                                     SourceLocation Loc,
12636                                     llvm::AtomicOrdering AO) {
12637   llvm_unreachable("Not supported in SIMD-only mode");
12638 }
12639 
12640 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12641                                        const OMPExecutableDirective &D,
12642                                        llvm::Function *TaskFunction,
12643                                        QualType SharedsTy, Address Shareds,
12644                                        const Expr *IfCond,
12645                                        const OMPTaskDataTy &Data) {
12646   llvm_unreachable("Not supported in SIMD-only mode");
12647 }
12648 
12649 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12650     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12651     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12652     const Expr *IfCond, const OMPTaskDataTy &Data) {
12653   llvm_unreachable("Not supported in SIMD-only mode");
12654 }
12655 
12656 void CGOpenMPSIMDRuntime::emitReduction(
12657     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12658     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12659     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12660   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12661   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12662                                  ReductionOps, Options);
12663 }
12664 
12665 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12666     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12667     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12668   llvm_unreachable("Not supported in SIMD-only mode");
12669 }
12670 
12671 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12672                                                 SourceLocation Loc,
12673                                                 bool IsWorksharingReduction) {
12674   llvm_unreachable("Not supported in SIMD-only mode");
12675 }
12676 
12677 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12678                                                   SourceLocation Loc,
12679                                                   ReductionCodeGen &RCG,
12680                                                   unsigned N) {
12681   llvm_unreachable("Not supported in SIMD-only mode");
12682 }
12683 
12684 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12685                                                   SourceLocation Loc,
12686                                                   llvm::Value *ReductionsPtr,
12687                                                   LValue SharedLVal) {
12688   llvm_unreachable("Not supported in SIMD-only mode");
12689 }
12690 
12691 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12692                                            SourceLocation Loc) {
12693   llvm_unreachable("Not supported in SIMD-only mode");
12694 }
12695 
12696 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12697     CodeGenFunction &CGF, SourceLocation Loc,
12698     OpenMPDirectiveKind CancelRegion) {
12699   llvm_unreachable("Not supported in SIMD-only mode");
12700 }
12701 
12702 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12703                                          SourceLocation Loc, const Expr *IfCond,
12704                                          OpenMPDirectiveKind CancelRegion) {
12705   llvm_unreachable("Not supported in SIMD-only mode");
12706 }
12707 
12708 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12709     const OMPExecutableDirective &D, StringRef ParentName,
12710     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12711     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12712   llvm_unreachable("Not supported in SIMD-only mode");
12713 }
12714 
12715 void CGOpenMPSIMDRuntime::emitTargetCall(
12716     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12717     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12718     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12719     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12720                                      const OMPLoopDirective &D)>
12721         SizeEmitter) {
12722   llvm_unreachable("Not supported in SIMD-only mode");
12723 }
12724 
12725 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12726   llvm_unreachable("Not supported in SIMD-only mode");
12727 }
12728 
12729 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12730   llvm_unreachable("Not supported in SIMD-only mode");
12731 }
12732 
12733 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12734   return false;
12735 }
12736 
12737 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12738                                         const OMPExecutableDirective &D,
12739                                         SourceLocation Loc,
12740                                         llvm::Function *OutlinedFn,
12741                                         ArrayRef<llvm::Value *> CapturedVars) {
12742   llvm_unreachable("Not supported in SIMD-only mode");
12743 }
12744 
12745 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12746                                              const Expr *NumTeams,
12747                                              const Expr *ThreadLimit,
12748                                              SourceLocation Loc) {
12749   llvm_unreachable("Not supported in SIMD-only mode");
12750 }
12751 
12752 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12753     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12754     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12755   llvm_unreachable("Not supported in SIMD-only mode");
12756 }
12757 
12758 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12759     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12760     const Expr *Device) {
12761   llvm_unreachable("Not supported in SIMD-only mode");
12762 }
12763 
12764 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12765                                            const OMPLoopDirective &D,
12766                                            ArrayRef<Expr *> NumIterations) {
12767   llvm_unreachable("Not supported in SIMD-only mode");
12768 }
12769 
12770 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12771                                               const OMPDependClause *C) {
12772   llvm_unreachable("Not supported in SIMD-only mode");
12773 }
12774 
12775 const VarDecl *
12776 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12777                                         const VarDecl *NativeParam) const {
12778   llvm_unreachable("Not supported in SIMD-only mode");
12779 }
12780 
12781 Address
12782 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12783                                          const VarDecl *NativeParam,
12784                                          const VarDecl *TargetParam) const {
12785   llvm_unreachable("Not supported in SIMD-only mode");
12786 }
12787