1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101 
2102     // Ensure we do not inline the function. This is trivially true for the ones
2103     // passed to __kmpc_fork_call but the ones calles in serialized regions
2104     // could be inlined. This is not a perfect but it is closer to the invariant
2105     // we want, namely, every data environment starts with a new function.
2106     // TODO: We should pass the if condition to the runtime function and do the
2107     //       handling there. Much cleaner code.
2108     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2109     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2110 
2111     // __kmpc_end_serialized_parallel(&Loc, GTid);
2112     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2113     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2114                             M, OMPRTL___kmpc_end_serialized_parallel),
2115                         EndArgs);
2116   };
2117   if (IfCond) {
2118     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2119   } else {
2120     RegionCodeGenTy ThenRCG(ThenGen);
2121     ThenRCG(CGF);
2122   }
2123 }
2124 
2125 // If we're inside an (outlined) parallel region, use the region info's
2126 // thread-ID variable (it is passed in a first argument of the outlined function
2127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2128 // regular serial code region, get thread ID by calling kmp_int32
2129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2130 // return the address of that temp.
2131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2132                                              SourceLocation Loc) {
2133   if (auto *OMPRegionInfo =
2134           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2135     if (OMPRegionInfo->getThreadIDVariable())
2136       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2137 
2138   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2139   QualType Int32Ty =
2140       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2141   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2142   CGF.EmitStoreOfScalar(ThreadID,
2143                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2144 
2145   return ThreadIDTemp;
2146 }
2147 
2148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2149     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2150   SmallString<256> Buffer;
2151   llvm::raw_svector_ostream Out(Buffer);
2152   Out << Name;
2153   StringRef RuntimeName = Out.str();
2154   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2155   if (Elem.second) {
2156     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2157            "OMP internal variable has different type than requested");
2158     return &*Elem.second;
2159   }
2160 
2161   return Elem.second = new llvm::GlobalVariable(
2162              CGM.getModule(), Ty, /*IsConstant*/ false,
2163              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2164              Elem.first(), /*InsertBefore=*/nullptr,
2165              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2166 }
2167 
2168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2169   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2170   std::string Name = getName({Prefix, "var"});
2171   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2172 }
2173 
2174 namespace {
2175 /// Common pre(post)-action for different OpenMP constructs.
2176 class CommonActionTy final : public PrePostActionTy {
2177   llvm::FunctionCallee EnterCallee;
2178   ArrayRef<llvm::Value *> EnterArgs;
2179   llvm::FunctionCallee ExitCallee;
2180   ArrayRef<llvm::Value *> ExitArgs;
2181   bool Conditional;
2182   llvm::BasicBlock *ContBlock = nullptr;
2183 
2184 public:
2185   CommonActionTy(llvm::FunctionCallee EnterCallee,
2186                  ArrayRef<llvm::Value *> EnterArgs,
2187                  llvm::FunctionCallee ExitCallee,
2188                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2189       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2190         ExitArgs(ExitArgs), Conditional(Conditional) {}
2191   void Enter(CodeGenFunction &CGF) override {
2192     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2193     if (Conditional) {
2194       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2195       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2196       ContBlock = CGF.createBasicBlock("omp_if.end");
2197       // Generate the branch (If-stmt)
2198       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2199       CGF.EmitBlock(ThenBlock);
2200     }
2201   }
2202   void Done(CodeGenFunction &CGF) {
2203     // Emit the rest of blocks/branches
2204     CGF.EmitBranch(ContBlock);
2205     CGF.EmitBlock(ContBlock, true);
2206   }
2207   void Exit(CodeGenFunction &CGF) override {
2208     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2209   }
2210 };
2211 } // anonymous namespace
2212 
2213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2214                                          StringRef CriticalName,
2215                                          const RegionCodeGenTy &CriticalOpGen,
2216                                          SourceLocation Loc, const Expr *Hint) {
2217   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2218   // CriticalOpGen();
2219   // __kmpc_end_critical(ident_t *, gtid, Lock);
2220   // Prepare arguments and build a call to __kmpc_critical
2221   if (!CGF.HaveInsertPoint())
2222     return;
2223   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2224                          getCriticalRegionLock(CriticalName)};
2225   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2226                                                 std::end(Args));
2227   if (Hint) {
2228     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2229         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2230   }
2231   CommonActionTy Action(
2232       OMPBuilder.getOrCreateRuntimeFunction(
2233           CGM.getModule(),
2234           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2235       EnterArgs,
2236       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2237                                             OMPRTL___kmpc_end_critical),
2238       Args);
2239   CriticalOpGen.setAction(Action);
2240   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2241 }
2242 
2243 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2244                                        const RegionCodeGenTy &MasterOpGen,
2245                                        SourceLocation Loc) {
2246   if (!CGF.HaveInsertPoint())
2247     return;
2248   // if(__kmpc_master(ident_t *, gtid)) {
2249   //   MasterOpGen();
2250   //   __kmpc_end_master(ident_t *, gtid);
2251   // }
2252   // Prepare arguments and build a call to __kmpc_master
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2254   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2255                             CGM.getModule(), OMPRTL___kmpc_master),
2256                         Args,
2257                         OMPBuilder.getOrCreateRuntimeFunction(
2258                             CGM.getModule(), OMPRTL___kmpc_end_master),
2259                         Args,
2260                         /*Conditional=*/true);
2261   MasterOpGen.setAction(Action);
2262   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2263   Action.Done(CGF);
2264 }
2265 
2266 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2267                                         SourceLocation Loc) {
2268   if (!CGF.HaveInsertPoint())
2269     return;
2270   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2271     OMPBuilder.createTaskyield(CGF.Builder);
2272   } else {
2273     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2274     llvm::Value *Args[] = {
2275         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2276         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2277     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2278                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2279                         Args);
2280   }
2281 
2282   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2283     Region->emitUntiedSwitch(CGF);
2284 }
2285 
2286 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2287                                           const RegionCodeGenTy &TaskgroupOpGen,
2288                                           SourceLocation Loc) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   // __kmpc_taskgroup(ident_t *, gtid);
2292   // TaskgroupOpGen();
2293   // __kmpc_end_taskgroup(ident_t *, gtid);
2294   // Prepare arguments and build a call to __kmpc_taskgroup
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2298                         Args,
2299                         OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2301                         Args);
2302   TaskgroupOpGen.setAction(Action);
2303   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2304 }
2305 
2306 /// Given an array of pointers to variables, project the address of a
2307 /// given variable.
2308 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2309                                       unsigned Index, const VarDecl *Var) {
2310   // Pull out the pointer to the variable.
2311   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2312   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2313 
2314   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2315   Addr = CGF.Builder.CreateElementBitCast(
2316       Addr, CGF.ConvertTypeForMem(Var->getType()));
2317   return Addr;
2318 }
2319 
2320 static llvm::Value *emitCopyprivateCopyFunction(
2321     CodeGenModule &CGM, llvm::Type *ArgsType,
2322     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2323     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2324     SourceLocation Loc) {
2325   ASTContext &C = CGM.getContext();
2326   // void copy_func(void *LHSArg, void *RHSArg);
2327   FunctionArgList Args;
2328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2329                            ImplicitParamDecl::Other);
2330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2331                            ImplicitParamDecl::Other);
2332   Args.push_back(&LHSArg);
2333   Args.push_back(&RHSArg);
2334   const auto &CGFI =
2335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2336   std::string Name =
2337       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2338   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2339                                     llvm::GlobalValue::InternalLinkage, Name,
2340                                     &CGM.getModule());
2341   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2342   Fn->setDoesNotRecurse();
2343   CodeGenFunction CGF(CGM);
2344   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2345   // Dest = (void*[n])(LHSArg);
2346   // Src = (void*[n])(RHSArg);
2347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2348       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2349       ArgsType), CGF.getPointerAlign());
2350   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2351       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2352       ArgsType), CGF.getPointerAlign());
2353   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2354   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2355   // ...
2356   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2357   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2358     const auto *DestVar =
2359         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2360     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2361 
2362     const auto *SrcVar =
2363         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2364     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2365 
2366     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2367     QualType Type = VD->getType();
2368     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2369   }
2370   CGF.FinishFunction();
2371   return Fn;
2372 }
2373 
2374 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2375                                        const RegionCodeGenTy &SingleOpGen,
2376                                        SourceLocation Loc,
2377                                        ArrayRef<const Expr *> CopyprivateVars,
2378                                        ArrayRef<const Expr *> SrcExprs,
2379                                        ArrayRef<const Expr *> DstExprs,
2380                                        ArrayRef<const Expr *> AssignmentOps) {
2381   if (!CGF.HaveInsertPoint())
2382     return;
2383   assert(CopyprivateVars.size() == SrcExprs.size() &&
2384          CopyprivateVars.size() == DstExprs.size() &&
2385          CopyprivateVars.size() == AssignmentOps.size());
2386   ASTContext &C = CGM.getContext();
2387   // int32 did_it = 0;
2388   // if(__kmpc_single(ident_t *, gtid)) {
2389   //   SingleOpGen();
2390   //   __kmpc_end_single(ident_t *, gtid);
2391   //   did_it = 1;
2392   // }
2393   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2394   // <copy_func>, did_it);
2395 
2396   Address DidIt = Address::invalid();
2397   if (!CopyprivateVars.empty()) {
2398     // int32 did_it = 0;
2399     QualType KmpInt32Ty =
2400         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2401     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2402     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2403   }
2404   // Prepare arguments and build a call to __kmpc_single
2405   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2406   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2407                             CGM.getModule(), OMPRTL___kmpc_single),
2408                         Args,
2409                         OMPBuilder.getOrCreateRuntimeFunction(
2410                             CGM.getModule(), OMPRTL___kmpc_end_single),
2411                         Args,
2412                         /*Conditional=*/true);
2413   SingleOpGen.setAction(Action);
2414   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2415   if (DidIt.isValid()) {
2416     // did_it = 1;
2417     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2418   }
2419   Action.Done(CGF);
2420   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2421   // <copy_func>, did_it);
2422   if (DidIt.isValid()) {
2423     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2424     QualType CopyprivateArrayTy = C.getConstantArrayType(
2425         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2426         /*IndexTypeQuals=*/0);
2427     // Create a list of all private variables for copyprivate.
2428     Address CopyprivateList =
2429         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2430     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2431       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2432       CGF.Builder.CreateStore(
2433           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2434               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2435               CGF.VoidPtrTy),
2436           Elem);
2437     }
2438     // Build function that copies private values from single region to all other
2439     // threads in the corresponding parallel region.
2440     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2441         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2442         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2443     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2444     Address CL =
2445       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2446                                                       CGF.VoidPtrTy);
2447     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2448     llvm::Value *Args[] = {
2449         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2450         getThreadID(CGF, Loc),        // i32 <gtid>
2451         BufSize,                      // size_t <buf_size>
2452         CL.getPointer(),              // void *<copyprivate list>
2453         CpyFn,                        // void (*) (void *, void *) <copy_func>
2454         DidItVal                      // i32 did_it
2455     };
2456     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2458                         Args);
2459   }
2460 }
2461 
2462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2463                                         const RegionCodeGenTy &OrderedOpGen,
2464                                         SourceLocation Loc, bool IsThreads) {
2465   if (!CGF.HaveInsertPoint())
2466     return;
2467   // __kmpc_ordered(ident_t *, gtid);
2468   // OrderedOpGen();
2469   // __kmpc_end_ordered(ident_t *, gtid);
2470   // Prepare arguments and build a call to __kmpc_ordered
2471   if (IsThreads) {
2472     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2473     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2474                               CGM.getModule(), OMPRTL___kmpc_ordered),
2475                           Args,
2476                           OMPBuilder.getOrCreateRuntimeFunction(
2477                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2478                           Args);
2479     OrderedOpGen.setAction(Action);
2480     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2481     return;
2482   }
2483   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2484 }
2485 
2486 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2487   unsigned Flags;
2488   if (Kind == OMPD_for)
2489     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2490   else if (Kind == OMPD_sections)
2491     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2492   else if (Kind == OMPD_single)
2493     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2494   else if (Kind == OMPD_barrier)
2495     Flags = OMP_IDENT_BARRIER_EXPL;
2496   else
2497     Flags = OMP_IDENT_BARRIER_IMPL;
2498   return Flags;
2499 }
2500 
2501 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2502     CodeGenFunction &CGF, const OMPLoopDirective &S,
2503     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2504   // Check if the loop directive is actually a doacross loop directive. In this
2505   // case choose static, 1 schedule.
2506   if (llvm::any_of(
2507           S.getClausesOfKind<OMPOrderedClause>(),
2508           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2509     ScheduleKind = OMPC_SCHEDULE_static;
2510     // Chunk size is 1 in this case.
2511     llvm::APInt ChunkSize(32, 1);
2512     ChunkExpr = IntegerLiteral::Create(
2513         CGF.getContext(), ChunkSize,
2514         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2515         SourceLocation());
2516   }
2517 }
2518 
2519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2520                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2521                                       bool ForceSimpleCall) {
2522   // Check if we should use the OMPBuilder
2523   auto *OMPRegionInfo =
2524       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2525   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2526     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2527         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2528     return;
2529   }
2530 
2531   if (!CGF.HaveInsertPoint())
2532     return;
2533   // Build call __kmpc_cancel_barrier(loc, thread_id);
2534   // Build call __kmpc_barrier(loc, thread_id);
2535   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2536   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2537   // thread_id);
2538   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2539                          getThreadID(CGF, Loc)};
2540   if (OMPRegionInfo) {
2541     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2542       llvm::Value *Result = CGF.EmitRuntimeCall(
2543           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2544                                                 OMPRTL___kmpc_cancel_barrier),
2545           Args);
2546       if (EmitChecks) {
2547         // if (__kmpc_cancel_barrier()) {
2548         //   exit from construct;
2549         // }
2550         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2551         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2552         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2553         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2554         CGF.EmitBlock(ExitBB);
2555         //   exit from construct;
2556         CodeGenFunction::JumpDest CancelDestination =
2557             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2558         CGF.EmitBranchThroughCleanup(CancelDestination);
2559         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2560       }
2561       return;
2562     }
2563   }
2564   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2565                           CGM.getModule(), OMPRTL___kmpc_barrier),
2566                       Args);
2567 }
2568 
2569 /// Map the OpenMP loop schedule to the runtime enumeration.
2570 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2571                                           bool Chunked, bool Ordered) {
2572   switch (ScheduleKind) {
2573   case OMPC_SCHEDULE_static:
2574     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2575                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2576   case OMPC_SCHEDULE_dynamic:
2577     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2578   case OMPC_SCHEDULE_guided:
2579     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2580   case OMPC_SCHEDULE_runtime:
2581     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2582   case OMPC_SCHEDULE_auto:
2583     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2584   case OMPC_SCHEDULE_unknown:
2585     assert(!Chunked && "chunk was specified but schedule kind not known");
2586     return Ordered ? OMP_ord_static : OMP_sch_static;
2587   }
2588   llvm_unreachable("Unexpected runtime schedule");
2589 }
2590 
2591 /// Map the OpenMP distribute schedule to the runtime enumeration.
2592 static OpenMPSchedType
2593 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2594   // only static is allowed for dist_schedule
2595   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2596 }
2597 
2598 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2599                                          bool Chunked) const {
2600   OpenMPSchedType Schedule =
2601       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2602   return Schedule == OMP_sch_static;
2603 }
2604 
2605 bool CGOpenMPRuntime::isStaticNonchunked(
2606     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2607   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2608   return Schedule == OMP_dist_sch_static;
2609 }
2610 
2611 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2612                                       bool Chunked) const {
2613   OpenMPSchedType Schedule =
2614       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2615   return Schedule == OMP_sch_static_chunked;
2616 }
2617 
2618 bool CGOpenMPRuntime::isStaticChunked(
2619     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2620   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2621   return Schedule == OMP_dist_sch_static_chunked;
2622 }
2623 
2624 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2625   OpenMPSchedType Schedule =
2626       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2627   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2628   return Schedule != OMP_sch_static;
2629 }
2630 
2631 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2632                                   OpenMPScheduleClauseModifier M1,
2633                                   OpenMPScheduleClauseModifier M2) {
2634   int Modifier = 0;
2635   switch (M1) {
2636   case OMPC_SCHEDULE_MODIFIER_monotonic:
2637     Modifier = OMP_sch_modifier_monotonic;
2638     break;
2639   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2640     Modifier = OMP_sch_modifier_nonmonotonic;
2641     break;
2642   case OMPC_SCHEDULE_MODIFIER_simd:
2643     if (Schedule == OMP_sch_static_chunked)
2644       Schedule = OMP_sch_static_balanced_chunked;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_last:
2647   case OMPC_SCHEDULE_MODIFIER_unknown:
2648     break;
2649   }
2650   switch (M2) {
2651   case OMPC_SCHEDULE_MODIFIER_monotonic:
2652     Modifier = OMP_sch_modifier_monotonic;
2653     break;
2654   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2655     Modifier = OMP_sch_modifier_nonmonotonic;
2656     break;
2657   case OMPC_SCHEDULE_MODIFIER_simd:
2658     if (Schedule == OMP_sch_static_chunked)
2659       Schedule = OMP_sch_static_balanced_chunked;
2660     break;
2661   case OMPC_SCHEDULE_MODIFIER_last:
2662   case OMPC_SCHEDULE_MODIFIER_unknown:
2663     break;
2664   }
2665   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2666   // If the static schedule kind is specified or if the ordered clause is
2667   // specified, and if the nonmonotonic modifier is not specified, the effect is
2668   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2669   // modifier is specified, the effect is as if the nonmonotonic modifier is
2670   // specified.
2671   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2672     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2673           Schedule == OMP_sch_static_balanced_chunked ||
2674           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2675           Schedule == OMP_dist_sch_static_chunked ||
2676           Schedule == OMP_dist_sch_static))
2677       Modifier = OMP_sch_modifier_nonmonotonic;
2678   }
2679   return Schedule | Modifier;
2680 }
2681 
2682 void CGOpenMPRuntime::emitForDispatchInit(
2683     CodeGenFunction &CGF, SourceLocation Loc,
2684     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2685     bool Ordered, const DispatchRTInput &DispatchValues) {
2686   if (!CGF.HaveInsertPoint())
2687     return;
2688   OpenMPSchedType Schedule = getRuntimeSchedule(
2689       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2690   assert(Ordered ||
2691          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2692           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2693           Schedule != OMP_sch_static_balanced_chunked));
2694   // Call __kmpc_dispatch_init(
2695   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2696   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2697   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2698 
2699   // If the Chunk was not specified in the clause - use default value 1.
2700   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2701                                             : CGF.Builder.getIntN(IVSize, 1);
2702   llvm::Value *Args[] = {
2703       emitUpdateLocation(CGF, Loc),
2704       getThreadID(CGF, Loc),
2705       CGF.Builder.getInt32(addMonoNonMonoModifier(
2706           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2707       DispatchValues.LB,                                     // Lower
2708       DispatchValues.UB,                                     // Upper
2709       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2710       Chunk                                                  // Chunk
2711   };
2712   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2713 }
2714 
2715 static void emitForStaticInitCall(
2716     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2717     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2718     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2719     const CGOpenMPRuntime::StaticRTInput &Values) {
2720   if (!CGF.HaveInsertPoint())
2721     return;
2722 
2723   assert(!Values.Ordered);
2724   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2725          Schedule == OMP_sch_static_balanced_chunked ||
2726          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2727          Schedule == OMP_dist_sch_static ||
2728          Schedule == OMP_dist_sch_static_chunked);
2729 
2730   // Call __kmpc_for_static_init(
2731   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2732   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2733   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2734   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2735   llvm::Value *Chunk = Values.Chunk;
2736   if (Chunk == nullptr) {
2737     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2738             Schedule == OMP_dist_sch_static) &&
2739            "expected static non-chunked schedule");
2740     // If the Chunk was not specified in the clause - use default value 1.
2741     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2742   } else {
2743     assert((Schedule == OMP_sch_static_chunked ||
2744             Schedule == OMP_sch_static_balanced_chunked ||
2745             Schedule == OMP_ord_static_chunked ||
2746             Schedule == OMP_dist_sch_static_chunked) &&
2747            "expected static chunked schedule");
2748   }
2749   llvm::Value *Args[] = {
2750       UpdateLocation,
2751       ThreadId,
2752       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2753                                                   M2)), // Schedule type
2754       Values.IL.getPointer(),                           // &isLastIter
2755       Values.LB.getPointer(),                           // &LB
2756       Values.UB.getPointer(),                           // &UB
2757       Values.ST.getPointer(),                           // &Stride
2758       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2759       Chunk                                             // Chunk
2760   };
2761   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2762 }
2763 
2764 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2765                                         SourceLocation Loc,
2766                                         OpenMPDirectiveKind DKind,
2767                                         const OpenMPScheduleTy &ScheduleKind,
2768                                         const StaticRTInput &Values) {
2769   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2770       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2771   assert(isOpenMPWorksharingDirective(DKind) &&
2772          "Expected loop-based or sections-based directive.");
2773   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2774                                              isOpenMPLoopDirective(DKind)
2775                                                  ? OMP_IDENT_WORK_LOOP
2776                                                  : OMP_IDENT_WORK_SECTIONS);
2777   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2778   llvm::FunctionCallee StaticInitFunction =
2779       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2780   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2781   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2782                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2783 }
2784 
2785 void CGOpenMPRuntime::emitDistributeStaticInit(
2786     CodeGenFunction &CGF, SourceLocation Loc,
2787     OpenMPDistScheduleClauseKind SchedKind,
2788     const CGOpenMPRuntime::StaticRTInput &Values) {
2789   OpenMPSchedType ScheduleNum =
2790       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2791   llvm::Value *UpdatedLocation =
2792       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2793   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2794   llvm::FunctionCallee StaticInitFunction =
2795       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2796   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2797                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2798                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2799 }
2800 
2801 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2802                                           SourceLocation Loc,
2803                                           OpenMPDirectiveKind DKind) {
2804   if (!CGF.HaveInsertPoint())
2805     return;
2806   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2807   llvm::Value *Args[] = {
2808       emitUpdateLocation(CGF, Loc,
2809                          isOpenMPDistributeDirective(DKind)
2810                              ? OMP_IDENT_WORK_DISTRIBUTE
2811                              : isOpenMPLoopDirective(DKind)
2812                                    ? OMP_IDENT_WORK_LOOP
2813                                    : OMP_IDENT_WORK_SECTIONS),
2814       getThreadID(CGF, Loc)};
2815   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2816   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2817                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2818                       Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2822                                                  SourceLocation Loc,
2823                                                  unsigned IVSize,
2824                                                  bool IVSigned) {
2825   if (!CGF.HaveInsertPoint())
2826     return;
2827   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2828   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2829   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2830 }
2831 
2832 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2833                                           SourceLocation Loc, unsigned IVSize,
2834                                           bool IVSigned, Address IL,
2835                                           Address LB, Address UB,
2836                                           Address ST) {
2837   // Call __kmpc_dispatch_next(
2838   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2839   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2840   //          kmp_int[32|64] *p_stride);
2841   llvm::Value *Args[] = {
2842       emitUpdateLocation(CGF, Loc),
2843       getThreadID(CGF, Loc),
2844       IL.getPointer(), // &isLastIter
2845       LB.getPointer(), // &Lower
2846       UB.getPointer(), // &Upper
2847       ST.getPointer()  // &Stride
2848   };
2849   llvm::Value *Call =
2850       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2851   return CGF.EmitScalarConversion(
2852       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2853       CGF.getContext().BoolTy, Loc);
2854 }
2855 
2856 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2857                                            llvm::Value *NumThreads,
2858                                            SourceLocation Loc) {
2859   if (!CGF.HaveInsertPoint())
2860     return;
2861   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2862   llvm::Value *Args[] = {
2863       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2864       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2865   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2866                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2867                       Args);
2868 }
2869 
2870 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2871                                          ProcBindKind ProcBind,
2872                                          SourceLocation Loc) {
2873   if (!CGF.HaveInsertPoint())
2874     return;
2875   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2876   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2877   llvm::Value *Args[] = {
2878       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2879       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2880   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2881                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2882                       Args);
2883 }
2884 
2885 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2886                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2887   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2888     OMPBuilder.createFlush(CGF.Builder);
2889   } else {
2890     if (!CGF.HaveInsertPoint())
2891       return;
2892     // Build call void __kmpc_flush(ident_t *loc)
2893     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2894                             CGM.getModule(), OMPRTL___kmpc_flush),
2895                         emitUpdateLocation(CGF, Loc));
2896   }
2897 }
2898 
2899 namespace {
2900 /// Indexes of fields for type kmp_task_t.
2901 enum KmpTaskTFields {
2902   /// List of shared variables.
2903   KmpTaskTShareds,
2904   /// Task routine.
2905   KmpTaskTRoutine,
2906   /// Partition id for the untied tasks.
2907   KmpTaskTPartId,
2908   /// Function with call of destructors for private variables.
2909   Data1,
2910   /// Task priority.
2911   Data2,
2912   /// (Taskloops only) Lower bound.
2913   KmpTaskTLowerBound,
2914   /// (Taskloops only) Upper bound.
2915   KmpTaskTUpperBound,
2916   /// (Taskloops only) Stride.
2917   KmpTaskTStride,
2918   /// (Taskloops only) Is last iteration flag.
2919   KmpTaskTLastIter,
2920   /// (Taskloops only) Reduction data.
2921   KmpTaskTReductions,
2922 };
2923 } // anonymous namespace
2924 
2925 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2926   return OffloadEntriesTargetRegion.empty() &&
2927          OffloadEntriesDeviceGlobalVar.empty();
2928 }
2929 
2930 /// Initialize target region entry.
2931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2932     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2933                                     StringRef ParentName, unsigned LineNum,
2934                                     unsigned Order) {
2935   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2936                                              "only required for the device "
2937                                              "code generation.");
2938   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2939       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2940                                    OMPTargetRegionEntryTargetRegion);
2941   ++OffloadingEntriesNum;
2942 }
2943 
2944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2945     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2946                                   StringRef ParentName, unsigned LineNum,
2947                                   llvm::Constant *Addr, llvm::Constant *ID,
2948                                   OMPTargetRegionEntryKind Flags) {
2949   // If we are emitting code for a target, the entry is already initialized,
2950   // only has to be registered.
2951   if (CGM.getLangOpts().OpenMPIsDevice) {
2952     // This could happen if the device compilation is invoked standalone.
2953     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2954       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2955                                       OffloadingEntriesNum);
2956     auto &Entry =
2957         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2958     Entry.setAddress(Addr);
2959     Entry.setID(ID);
2960     Entry.setFlags(Flags);
2961   } else {
2962     if (Flags ==
2963             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2964         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2965                                  /*IgnoreAddressId*/ true))
2966       return;
2967     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2968            "Target region entry already registered!");
2969     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2970     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2971     ++OffloadingEntriesNum;
2972   }
2973 }
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2976     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2977     bool IgnoreAddressId) const {
2978   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2979   if (PerDevice == OffloadEntriesTargetRegion.end())
2980     return false;
2981   auto PerFile = PerDevice->second.find(FileID);
2982   if (PerFile == PerDevice->second.end())
2983     return false;
2984   auto PerParentName = PerFile->second.find(ParentName);
2985   if (PerParentName == PerFile->second.end())
2986     return false;
2987   auto PerLine = PerParentName->second.find(LineNum);
2988   if (PerLine == PerParentName->second.end())
2989     return false;
2990   // Fail if this entry is already registered.
2991   if (!IgnoreAddressId &&
2992       (PerLine->second.getAddress() || PerLine->second.getID()))
2993     return false;
2994   return true;
2995 }
2996 
2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2998     const OffloadTargetRegionEntryInfoActTy &Action) {
2999   // Scan all target region entries and perform the provided action.
3000   for (const auto &D : OffloadEntriesTargetRegion)
3001     for (const auto &F : D.second)
3002       for (const auto &P : F.second)
3003         for (const auto &L : P.second)
3004           Action(D.first, F.first, P.first(), L.first, L.second);
3005 }
3006 
3007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3008     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3009                                        OMPTargetGlobalVarEntryKind Flags,
3010                                        unsigned Order) {
3011   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3012                                              "only required for the device "
3013                                              "code generation.");
3014   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3015   ++OffloadingEntriesNum;
3016 }
3017 
3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3020                                      CharUnits VarSize,
3021                                      OMPTargetGlobalVarEntryKind Flags,
3022                                      llvm::GlobalValue::LinkageTypes Linkage) {
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasDeviceGlobalVarEntryInfo(VarName))
3026       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3027     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3028     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3029            "Resetting with the new address.");
3030     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3031       if (Entry.getVarSize().isZero()) {
3032         Entry.setVarSize(VarSize);
3033         Entry.setLinkage(Linkage);
3034       }
3035       return;
3036     }
3037     Entry.setVarSize(VarSize);
3038     Entry.setLinkage(Linkage);
3039     Entry.setAddress(Addr);
3040   } else {
3041     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3042       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3043       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3044              "Entry not initialized!");
3045       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3046              "Resetting with the new address.");
3047       if (Entry.getVarSize().isZero()) {
3048         Entry.setVarSize(VarSize);
3049         Entry.setLinkage(Linkage);
3050       }
3051       return;
3052     }
3053     OffloadEntriesDeviceGlobalVar.try_emplace(
3054         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3055     ++OffloadingEntriesNum;
3056   }
3057 }
3058 
3059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3060     actOnDeviceGlobalVarEntriesInfo(
3061         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3062   // Scan all target region entries and perform the provided action.
3063   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3064     Action(E.getKey(), E.getValue());
3065 }
3066 
3067 void CGOpenMPRuntime::createOffloadEntry(
3068     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3069     llvm::GlobalValue::LinkageTypes Linkage) {
3070   StringRef Name = Addr->getName();
3071   llvm::Module &M = CGM.getModule();
3072   llvm::LLVMContext &C = M.getContext();
3073 
3074   // Create constant string with the name.
3075   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3076 
3077   std::string StringName = getName({"omp_offloading", "entry_name"});
3078   auto *Str = new llvm::GlobalVariable(
3079       M, StrPtrInit->getType(), /*isConstant=*/true,
3080       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3081   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3082 
3083   llvm::Constant *Data[] = {
3084       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3085       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3086       llvm::ConstantInt::get(CGM.SizeTy, Size),
3087       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3088       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3089   std::string EntryName = getName({"omp_offloading", "entry", ""});
3090   llvm::GlobalVariable *Entry = createGlobalStruct(
3091       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3092       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3093 
3094   // The entry has to be created in the section the linker expects it to be.
3095   Entry->setSection("omp_offloading_entries");
3096 }
3097 
3098 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3099   // Emit the offloading entries and metadata so that the device codegen side
3100   // can easily figure out what to emit. The produced metadata looks like
3101   // this:
3102   //
3103   // !omp_offload.info = !{!1, ...}
3104   //
3105   // Right now we only generate metadata for function that contain target
3106   // regions.
3107 
3108   // If we are in simd mode or there are no entries, we don't need to do
3109   // anything.
3110   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3111     return;
3112 
3113   llvm::Module &M = CGM.getModule();
3114   llvm::LLVMContext &C = M.getContext();
3115   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3116                          SourceLocation, StringRef>,
3117               16>
3118       OrderedEntries(OffloadEntriesInfoManager.size());
3119   llvm::SmallVector<StringRef, 16> ParentFunctions(
3120       OffloadEntriesInfoManager.size());
3121 
3122   // Auxiliary methods to create metadata values and strings.
3123   auto &&GetMDInt = [this](unsigned V) {
3124     return llvm::ConstantAsMetadata::get(
3125         llvm::ConstantInt::get(CGM.Int32Ty, V));
3126   };
3127 
3128   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3129 
3130   // Create the offloading info metadata node.
3131   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3132 
3133   // Create function that emits metadata for each target region entry;
3134   auto &&TargetRegionMetadataEmitter =
3135       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3136        &GetMDString](
3137           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3138           unsigned Line,
3139           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3140         // Generate metadata for target regions. Each entry of this metadata
3141         // contains:
3142         // - Entry 0 -> Kind of this type of metadata (0).
3143         // - Entry 1 -> Device ID of the file where the entry was identified.
3144         // - Entry 2 -> File ID of the file where the entry was identified.
3145         // - Entry 3 -> Mangled name of the function where the entry was
3146         // identified.
3147         // - Entry 4 -> Line in the file where the entry was identified.
3148         // - Entry 5 -> Order the entry was created.
3149         // The first element of the metadata node is the kind.
3150         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3151                                  GetMDInt(FileID),      GetMDString(ParentName),
3152                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3153 
3154         SourceLocation Loc;
3155         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3156                   E = CGM.getContext().getSourceManager().fileinfo_end();
3157              I != E; ++I) {
3158           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3159               I->getFirst()->getUniqueID().getFile() == FileID) {
3160             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3161                 I->getFirst(), Line, 1);
3162             break;
3163           }
3164         }
3165         // Save this entry in the right position of the ordered entries array.
3166         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3167         ParentFunctions[E.getOrder()] = ParentName;
3168 
3169         // Add metadata to the named metadata node.
3170         MD->addOperand(llvm::MDNode::get(C, Ops));
3171       };
3172 
3173   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3174       TargetRegionMetadataEmitter);
3175 
3176   // Create function that emits metadata for each device global variable entry;
3177   auto &&DeviceGlobalVarMetadataEmitter =
3178       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3179        MD](StringRef MangledName,
3180            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3181                &E) {
3182         // Generate metadata for global variables. Each entry of this metadata
3183         // contains:
3184         // - Entry 0 -> Kind of this type of metadata (1).
3185         // - Entry 1 -> Mangled name of the variable.
3186         // - Entry 2 -> Declare target kind.
3187         // - Entry 3 -> Order the entry was created.
3188         // The first element of the metadata node is the kind.
3189         llvm::Metadata *Ops[] = {
3190             GetMDInt(E.getKind()), GetMDString(MangledName),
3191             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3192 
3193         // Save this entry in the right position of the ordered entries array.
3194         OrderedEntries[E.getOrder()] =
3195             std::make_tuple(&E, SourceLocation(), MangledName);
3196 
3197         // Add metadata to the named metadata node.
3198         MD->addOperand(llvm::MDNode::get(C, Ops));
3199       };
3200 
3201   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3202       DeviceGlobalVarMetadataEmitter);
3203 
3204   for (const auto &E : OrderedEntries) {
3205     assert(std::get<0>(E) && "All ordered entries must exist!");
3206     if (const auto *CE =
3207             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3208                 std::get<0>(E))) {
3209       if (!CE->getID() || !CE->getAddress()) {
3210         // Do not blame the entry if the parent funtion is not emitted.
3211         StringRef FnName = ParentFunctions[CE->getOrder()];
3212         if (!CGM.GetGlobalValue(FnName))
3213           continue;
3214         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3215             DiagnosticsEngine::Error,
3216             "Offloading entry for target region in %0 is incorrect: either the "
3217             "address or the ID is invalid.");
3218         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3219         continue;
3220       }
3221       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3222                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3223     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3224                                              OffloadEntryInfoDeviceGlobalVar>(
3225                    std::get<0>(E))) {
3226       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3227           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3228               CE->getFlags());
3229       switch (Flags) {
3230       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3231         if (CGM.getLangOpts().OpenMPIsDevice &&
3232             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3233           continue;
3234         if (!CE->getAddress()) {
3235           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3236               DiagnosticsEngine::Error, "Offloading entry for declare target "
3237                                         "variable %0 is incorrect: the "
3238                                         "address is invalid.");
3239           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3240           continue;
3241         }
3242         // The vaiable has no definition - no need to add the entry.
3243         if (CE->getVarSize().isZero())
3244           continue;
3245         break;
3246       }
3247       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3248         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3249                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3250                "Declaret target link address is set.");
3251         if (CGM.getLangOpts().OpenMPIsDevice)
3252           continue;
3253         if (!CE->getAddress()) {
3254           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255               DiagnosticsEngine::Error,
3256               "Offloading entry for declare target variable is incorrect: the "
3257               "address is invalid.");
3258           CGM.getDiags().Report(DiagID);
3259           continue;
3260         }
3261         break;
3262       }
3263       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3264                          CE->getVarSize().getQuantity(), Flags,
3265                          CE->getLinkage());
3266     } else {
3267       llvm_unreachable("Unsupported entry kind.");
3268     }
3269   }
3270 }
3271 
3272 /// Loads all the offload entries information from the host IR
3273 /// metadata.
3274 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3275   // If we are in target mode, load the metadata from the host IR. This code has
3276   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3277 
3278   if (!CGM.getLangOpts().OpenMPIsDevice)
3279     return;
3280 
3281   if (CGM.getLangOpts().OMPHostIRFile.empty())
3282     return;
3283 
3284   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3285   if (auto EC = Buf.getError()) {
3286     CGM.getDiags().Report(diag::err_cannot_open_file)
3287         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3288     return;
3289   }
3290 
3291   llvm::LLVMContext C;
3292   auto ME = expectedToErrorOrAndEmitErrors(
3293       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3294 
3295   if (auto EC = ME.getError()) {
3296     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3297         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3298     CGM.getDiags().Report(DiagID)
3299         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3300     return;
3301   }
3302 
3303   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3304   if (!MD)
3305     return;
3306 
3307   for (llvm::MDNode *MN : MD->operands()) {
3308     auto &&GetMDInt = [MN](unsigned Idx) {
3309       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3310       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3311     };
3312 
3313     auto &&GetMDString = [MN](unsigned Idx) {
3314       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3315       return V->getString();
3316     };
3317 
3318     switch (GetMDInt(0)) {
3319     default:
3320       llvm_unreachable("Unexpected metadata!");
3321       break;
3322     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3323         OffloadingEntryInfoTargetRegion:
3324       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3325           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3326           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3327           /*Order=*/GetMDInt(5));
3328       break;
3329     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3330         OffloadingEntryInfoDeviceGlobalVar:
3331       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3332           /*MangledName=*/GetMDString(1),
3333           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3334               /*Flags=*/GetMDInt(2)),
3335           /*Order=*/GetMDInt(3));
3336       break;
3337     }
3338   }
3339 }
3340 
3341 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3342   if (!KmpRoutineEntryPtrTy) {
3343     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3344     ASTContext &C = CGM.getContext();
3345     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3346     FunctionProtoType::ExtProtoInfo EPI;
3347     KmpRoutineEntryPtrQTy = C.getPointerType(
3348         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3349     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3350   }
3351 }
3352 
3353 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3354   // Make sure the type of the entry is already created. This is the type we
3355   // have to create:
3356   // struct __tgt_offload_entry{
3357   //   void      *addr;       // Pointer to the offload entry info.
3358   //                          // (function or global)
3359   //   char      *name;       // Name of the function or global.
3360   //   size_t     size;       // Size of the entry info (0 if it a function).
3361   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3362   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3363   // };
3364   if (TgtOffloadEntryQTy.isNull()) {
3365     ASTContext &C = CGM.getContext();
3366     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3367     RD->startDefinition();
3368     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3369     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3370     addFieldToRecordDecl(C, RD, C.getSizeType());
3371     addFieldToRecordDecl(
3372         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3373     addFieldToRecordDecl(
3374         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3375     RD->completeDefinition();
3376     RD->addAttr(PackedAttr::CreateImplicit(C));
3377     TgtOffloadEntryQTy = C.getRecordType(RD);
3378   }
3379   return TgtOffloadEntryQTy;
3380 }
3381 
3382 namespace {
3383 struct PrivateHelpersTy {
3384   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3385                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3386       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3387         PrivateElemInit(PrivateElemInit) {}
3388   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3389   const Expr *OriginalRef = nullptr;
3390   const VarDecl *Original = nullptr;
3391   const VarDecl *PrivateCopy = nullptr;
3392   const VarDecl *PrivateElemInit = nullptr;
3393   bool isLocalPrivate() const {
3394     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3395   }
3396 };
3397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3398 } // anonymous namespace
3399 
3400 static bool isAllocatableDecl(const VarDecl *VD) {
3401   const VarDecl *CVD = VD->getCanonicalDecl();
3402   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3403     return false;
3404   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3405   // Use the default allocation.
3406   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3407             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3408            !AA->getAllocator());
3409 }
3410 
3411 static RecordDecl *
3412 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3413   if (!Privates.empty()) {
3414     ASTContext &C = CGM.getContext();
3415     // Build struct .kmp_privates_t. {
3416     //         /*  private vars  */
3417     //       };
3418     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3419     RD->startDefinition();
3420     for (const auto &Pair : Privates) {
3421       const VarDecl *VD = Pair.second.Original;
3422       QualType Type = VD->getType().getNonReferenceType();
3423       // If the private variable is a local variable with lvalue ref type,
3424       // allocate the pointer instead of the pointee type.
3425       if (Pair.second.isLocalPrivate()) {
3426         if (VD->getType()->isLValueReferenceType())
3427           Type = C.getPointerType(Type);
3428         if (isAllocatableDecl(VD))
3429           Type = C.getPointerType(Type);
3430       }
3431       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3432       if (VD->hasAttrs()) {
3433         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3434              E(VD->getAttrs().end());
3435              I != E; ++I)
3436           FD->addAttr(*I);
3437       }
3438     }
3439     RD->completeDefinition();
3440     return RD;
3441   }
3442   return nullptr;
3443 }
3444 
3445 static RecordDecl *
3446 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3447                          QualType KmpInt32Ty,
3448                          QualType KmpRoutineEntryPointerQTy) {
3449   ASTContext &C = CGM.getContext();
3450   // Build struct kmp_task_t {
3451   //         void *              shareds;
3452   //         kmp_routine_entry_t routine;
3453   //         kmp_int32           part_id;
3454   //         kmp_cmplrdata_t data1;
3455   //         kmp_cmplrdata_t data2;
3456   // For taskloops additional fields:
3457   //         kmp_uint64          lb;
3458   //         kmp_uint64          ub;
3459   //         kmp_int64           st;
3460   //         kmp_int32           liter;
3461   //         void *              reductions;
3462   //       };
3463   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3464   UD->startDefinition();
3465   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3466   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3467   UD->completeDefinition();
3468   QualType KmpCmplrdataTy = C.getRecordType(UD);
3469   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3470   RD->startDefinition();
3471   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3472   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3473   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3474   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3475   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3476   if (isOpenMPTaskLoopDirective(Kind)) {
3477     QualType KmpUInt64Ty =
3478         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3479     QualType KmpInt64Ty =
3480         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3481     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3482     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3483     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3484     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3485     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3486   }
3487   RD->completeDefinition();
3488   return RD;
3489 }
3490 
3491 static RecordDecl *
3492 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3493                                      ArrayRef<PrivateDataTy> Privates) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t_with_privates {
3496   //         kmp_task_t task_data;
3497   //         .kmp_privates_t. privates;
3498   //       };
3499   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3500   RD->startDefinition();
3501   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3502   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3503     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3504   RD->completeDefinition();
3505   return RD;
3506 }
3507 
3508 /// Emit a proxy function which accepts kmp_task_t as the second
3509 /// argument.
3510 /// \code
3511 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3512 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3513 ///   For taskloops:
3514 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3515 ///   tt->reductions, tt->shareds);
3516 ///   return 0;
3517 /// }
3518 /// \endcode
3519 static llvm::Function *
3520 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3521                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3522                       QualType KmpTaskTWithPrivatesPtrQTy,
3523                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3524                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3525                       llvm::Value *TaskPrivatesMap) {
3526   ASTContext &C = CGM.getContext();
3527   FunctionArgList Args;
3528   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3529                             ImplicitParamDecl::Other);
3530   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3531                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3532                                 ImplicitParamDecl::Other);
3533   Args.push_back(&GtidArg);
3534   Args.push_back(&TaskTypeArg);
3535   const auto &TaskEntryFnInfo =
3536       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3537   llvm::FunctionType *TaskEntryTy =
3538       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3539   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3540   auto *TaskEntry = llvm::Function::Create(
3541       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3542   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3543   TaskEntry->setDoesNotRecurse();
3544   CodeGenFunction CGF(CGM);
3545   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3546                     Loc, Loc);
3547 
3548   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3549   // tt,
3550   // For taskloops:
3551   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3552   // tt->task_data.shareds);
3553   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3554       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3555   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3556       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3557       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3558   const auto *KmpTaskTWithPrivatesQTyRD =
3559       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3560   LValue Base =
3561       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3562   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3563   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3564   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3565   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3566 
3567   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3568   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3569   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3570       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3571       CGF.ConvertTypeForMem(SharedsPtrTy));
3572 
3573   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3574   llvm::Value *PrivatesParam;
3575   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3576     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3577     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3578         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3579   } else {
3580     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3581   }
3582 
3583   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3584                                TaskPrivatesMap,
3585                                CGF.Builder
3586                                    .CreatePointerBitCastOrAddrSpaceCast(
3587                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3588                                    .getPointer()};
3589   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3590                                           std::end(CommonArgs));
3591   if (isOpenMPTaskLoopDirective(Kind)) {
3592     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3593     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3594     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3595     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3596     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3597     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3598     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3599     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3600     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3601     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3602     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3603     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3604     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3605     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3606     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3607     CallArgs.push_back(LBParam);
3608     CallArgs.push_back(UBParam);
3609     CallArgs.push_back(StParam);
3610     CallArgs.push_back(LIParam);
3611     CallArgs.push_back(RParam);
3612   }
3613   CallArgs.push_back(SharedsParam);
3614 
3615   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3616                                                   CallArgs);
3617   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3618                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3619   CGF.FinishFunction();
3620   return TaskEntry;
3621 }
3622 
3623 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3624                                             SourceLocation Loc,
3625                                             QualType KmpInt32Ty,
3626                                             QualType KmpTaskTWithPrivatesPtrQTy,
3627                                             QualType KmpTaskTWithPrivatesQTy) {
3628   ASTContext &C = CGM.getContext();
3629   FunctionArgList Args;
3630   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3631                             ImplicitParamDecl::Other);
3632   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3633                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3634                                 ImplicitParamDecl::Other);
3635   Args.push_back(&GtidArg);
3636   Args.push_back(&TaskTypeArg);
3637   const auto &DestructorFnInfo =
3638       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3639   llvm::FunctionType *DestructorFnTy =
3640       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3641   std::string Name =
3642       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3643   auto *DestructorFn =
3644       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3645                              Name, &CGM.getModule());
3646   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3647                                     DestructorFnInfo);
3648   DestructorFn->setDoesNotRecurse();
3649   CodeGenFunction CGF(CGM);
3650   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3651                     Args, Loc, Loc);
3652 
3653   LValue Base = CGF.EmitLoadOfPointerLValue(
3654       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3655       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3656   const auto *KmpTaskTWithPrivatesQTyRD =
3657       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3658   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3659   Base = CGF.EmitLValueForField(Base, *FI);
3660   for (const auto *Field :
3661        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3662     if (QualType::DestructionKind DtorKind =
3663             Field->getType().isDestructedType()) {
3664       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3665       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3666     }
3667   }
3668   CGF.FinishFunction();
3669   return DestructorFn;
3670 }
3671 
3672 /// Emit a privates mapping function for correct handling of private and
3673 /// firstprivate variables.
3674 /// \code
3675 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3676 /// **noalias priv1,...,  <tyn> **noalias privn) {
3677 ///   *priv1 = &.privates.priv1;
3678 ///   ...;
3679 ///   *privn = &.privates.privn;
3680 /// }
3681 /// \endcode
3682 static llvm::Value *
3683 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3684                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3685                                ArrayRef<PrivateDataTy> Privates) {
3686   ASTContext &C = CGM.getContext();
3687   FunctionArgList Args;
3688   ImplicitParamDecl TaskPrivatesArg(
3689       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3690       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3691       ImplicitParamDecl::Other);
3692   Args.push_back(&TaskPrivatesArg);
3693   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3694   unsigned Counter = 1;
3695   for (const Expr *E : Data.PrivateVars) {
3696     Args.push_back(ImplicitParamDecl::Create(
3697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3698         C.getPointerType(C.getPointerType(E->getType()))
3699             .withConst()
3700             .withRestrict(),
3701         ImplicitParamDecl::Other));
3702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3703     PrivateVarsPos[VD] = Counter;
3704     ++Counter;
3705   }
3706   for (const Expr *E : Data.FirstprivateVars) {
3707     Args.push_back(ImplicitParamDecl::Create(
3708         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3709         C.getPointerType(C.getPointerType(E->getType()))
3710             .withConst()
3711             .withRestrict(),
3712         ImplicitParamDecl::Other));
3713     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   for (const Expr *E : Data.LastprivateVars) {
3718     Args.push_back(ImplicitParamDecl::Create(
3719         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3720         C.getPointerType(C.getPointerType(E->getType()))
3721             .withConst()
3722             .withRestrict(),
3723         ImplicitParamDecl::Other));
3724     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3725     PrivateVarsPos[VD] = Counter;
3726     ++Counter;
3727   }
3728   for (const VarDecl *VD : Data.PrivateLocals) {
3729     QualType Ty = VD->getType().getNonReferenceType();
3730     if (VD->getType()->isLValueReferenceType())
3731       Ty = C.getPointerType(Ty);
3732     if (isAllocatableDecl(VD))
3733       Ty = C.getPointerType(Ty);
3734     Args.push_back(ImplicitParamDecl::Create(
3735         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3736         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3737         ImplicitParamDecl::Other));
3738     PrivateVarsPos[VD] = Counter;
3739     ++Counter;
3740   }
3741   const auto &TaskPrivatesMapFnInfo =
3742       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3743   llvm::FunctionType *TaskPrivatesMapTy =
3744       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3745   std::string Name =
3746       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3747   auto *TaskPrivatesMap = llvm::Function::Create(
3748       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3749       &CGM.getModule());
3750   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3751                                     TaskPrivatesMapFnInfo);
3752   if (CGM.getLangOpts().Optimize) {
3753     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3754     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3755     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3756   }
3757   CodeGenFunction CGF(CGM);
3758   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3759                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3760 
3761   // *privi = &.privates.privi;
3762   LValue Base = CGF.EmitLoadOfPointerLValue(
3763       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3764       TaskPrivatesArg.getType()->castAs<PointerType>());
3765   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3766   Counter = 0;
3767   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3768     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3769     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3770     LValue RefLVal =
3771         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3772     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3773         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3774     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3775     ++Counter;
3776   }
3777   CGF.FinishFunction();
3778   return TaskPrivatesMap;
3779 }
3780 
3781 /// Emit initialization for private variables in task-based directives.
3782 static void emitPrivatesInit(CodeGenFunction &CGF,
3783                              const OMPExecutableDirective &D,
3784                              Address KmpTaskSharedsPtr, LValue TDBase,
3785                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3786                              QualType SharedsTy, QualType SharedsPtrTy,
3787                              const OMPTaskDataTy &Data,
3788                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3789   ASTContext &C = CGF.getContext();
3790   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3791   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3792   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3793                                  ? OMPD_taskloop
3794                                  : OMPD_task;
3795   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3796   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3797   LValue SrcBase;
3798   bool IsTargetTask =
3799       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3800       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3801   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3802   // PointersArray, SizesArray, and MappersArray. The original variables for
3803   // these arrays are not captured and we get their addresses explicitly.
3804   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3805       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3806     SrcBase = CGF.MakeAddrLValue(
3807         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3808             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3809         SharedsTy);
3810   }
3811   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3812   for (const PrivateDataTy &Pair : Privates) {
3813     // Do not initialize private locals.
3814     if (Pair.second.isLocalPrivate()) {
3815       ++FI;
3816       continue;
3817     }
3818     const VarDecl *VD = Pair.second.PrivateCopy;
3819     const Expr *Init = VD->getAnyInitializer();
3820     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3821                              !CGF.isTrivialInitializer(Init)))) {
3822       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3823       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3824         const VarDecl *OriginalVD = Pair.second.Original;
3825         // Check if the variable is the target-based BasePointersArray,
3826         // PointersArray, SizesArray, or MappersArray.
3827         LValue SharedRefLValue;
3828         QualType Type = PrivateLValue.getType();
3829         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3830         if (IsTargetTask && !SharedField) {
3831           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3832                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3833                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3834                          ->getNumParams() == 0 &&
3835                  isa<TranslationUnitDecl>(
3836                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3837                          ->getDeclContext()) &&
3838                  "Expected artificial target data variable.");
3839           SharedRefLValue =
3840               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3841         } else if (ForDup) {
3842           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3843           SharedRefLValue = CGF.MakeAddrLValue(
3844               Address(SharedRefLValue.getPointer(CGF),
3845                       C.getDeclAlign(OriginalVD)),
3846               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3847               SharedRefLValue.getTBAAInfo());
3848         } else if (CGF.LambdaCaptureFields.count(
3849                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3850                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3851           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3852         } else {
3853           // Processing for implicitly captured variables.
3854           InlinedOpenMPRegionRAII Region(
3855               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3856               /*HasCancel=*/false);
3857           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3858         }
3859         if (Type->isArrayType()) {
3860           // Initialize firstprivate array.
3861           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3862             // Perform simple memcpy.
3863             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3864           } else {
3865             // Initialize firstprivate array using element-by-element
3866             // initialization.
3867             CGF.EmitOMPAggregateAssign(
3868                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3869                 Type,
3870                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3871                                                   Address SrcElement) {
3872                   // Clean up any temporaries needed by the initialization.
3873                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3874                   InitScope.addPrivate(
3875                       Elem, [SrcElement]() -> Address { return SrcElement; });
3876                   (void)InitScope.Privatize();
3877                   // Emit initialization for single element.
3878                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3879                       CGF, &CapturesInfo);
3880                   CGF.EmitAnyExprToMem(Init, DestElement,
3881                                        Init->getType().getQualifiers(),
3882                                        /*IsInitializer=*/false);
3883                 });
3884           }
3885         } else {
3886           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3887           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3888             return SharedRefLValue.getAddress(CGF);
3889           });
3890           (void)InitScope.Privatize();
3891           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3892           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3893                              /*capturedByInit=*/false);
3894         }
3895       } else {
3896         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3897       }
3898     }
3899     ++FI;
3900   }
3901 }
3902 
3903 /// Check if duplication function is required for taskloops.
3904 static bool checkInitIsRequired(CodeGenFunction &CGF,
3905                                 ArrayRef<PrivateDataTy> Privates) {
3906   bool InitRequired = false;
3907   for (const PrivateDataTy &Pair : Privates) {
3908     if (Pair.second.isLocalPrivate())
3909       continue;
3910     const VarDecl *VD = Pair.second.PrivateCopy;
3911     const Expr *Init = VD->getAnyInitializer();
3912     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3913                                     !CGF.isTrivialInitializer(Init));
3914     if (InitRequired)
3915       break;
3916   }
3917   return InitRequired;
3918 }
3919 
3920 
3921 /// Emit task_dup function (for initialization of
3922 /// private/firstprivate/lastprivate vars and last_iter flag)
3923 /// \code
3924 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3925 /// lastpriv) {
3926 /// // setup lastprivate flag
3927 ///    task_dst->last = lastpriv;
3928 /// // could be constructor calls here...
3929 /// }
3930 /// \endcode
3931 static llvm::Value *
3932 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3933                     const OMPExecutableDirective &D,
3934                     QualType KmpTaskTWithPrivatesPtrQTy,
3935                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3936                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3937                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3938                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3939   ASTContext &C = CGM.getContext();
3940   FunctionArgList Args;
3941   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3942                            KmpTaskTWithPrivatesPtrQTy,
3943                            ImplicitParamDecl::Other);
3944   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3945                            KmpTaskTWithPrivatesPtrQTy,
3946                            ImplicitParamDecl::Other);
3947   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3948                                 ImplicitParamDecl::Other);
3949   Args.push_back(&DstArg);
3950   Args.push_back(&SrcArg);
3951   Args.push_back(&LastprivArg);
3952   const auto &TaskDupFnInfo =
3953       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3954   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3955   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3956   auto *TaskDup = llvm::Function::Create(
3957       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3958   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3959   TaskDup->setDoesNotRecurse();
3960   CodeGenFunction CGF(CGM);
3961   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3962                     Loc);
3963 
3964   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3965       CGF.GetAddrOfLocalVar(&DstArg),
3966       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3967   // task_dst->liter = lastpriv;
3968   if (WithLastIter) {
3969     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3970     LValue Base = CGF.EmitLValueForField(
3971         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3972     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3973     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3974         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3975     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3976   }
3977 
3978   // Emit initial values for private copies (if any).
3979   assert(!Privates.empty());
3980   Address KmpTaskSharedsPtr = Address::invalid();
3981   if (!Data.FirstprivateVars.empty()) {
3982     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3983         CGF.GetAddrOfLocalVar(&SrcArg),
3984         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3985     LValue Base = CGF.EmitLValueForField(
3986         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3987     KmpTaskSharedsPtr = Address(
3988         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3989                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3990                                                   KmpTaskTShareds)),
3991                              Loc),
3992         CGM.getNaturalTypeAlignment(SharedsTy));
3993   }
3994   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3995                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3996   CGF.FinishFunction();
3997   return TaskDup;
3998 }
3999 
4000 /// Checks if destructor function is required to be generated.
4001 /// \return true if cleanups are required, false otherwise.
4002 static bool
4003 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4004                          ArrayRef<PrivateDataTy> Privates) {
4005   for (const PrivateDataTy &P : Privates) {
4006     if (P.second.isLocalPrivate())
4007       continue;
4008     QualType Ty = P.second.Original->getType().getNonReferenceType();
4009     if (Ty.isDestructedType())
4010       return true;
4011   }
4012   return false;
4013 }
4014 
4015 namespace {
4016 /// Loop generator for OpenMP iterator expression.
4017 class OMPIteratorGeneratorScope final
4018     : public CodeGenFunction::OMPPrivateScope {
4019   CodeGenFunction &CGF;
4020   const OMPIteratorExpr *E = nullptr;
4021   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4022   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4023   OMPIteratorGeneratorScope() = delete;
4024   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4025 
4026 public:
4027   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4028       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4029     if (!E)
4030       return;
4031     SmallVector<llvm::Value *, 4> Uppers;
4032     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4033       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4034       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4035       addPrivate(VD, [&CGF, VD]() {
4036         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4037       });
4038       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4039       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4040         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4041                                  "counter.addr");
4042       });
4043     }
4044     Privatize();
4045 
4046     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4047       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4048       LValue CLVal =
4049           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4050                              HelperData.CounterVD->getType());
4051       // Counter = 0;
4052       CGF.EmitStoreOfScalar(
4053           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4054           CLVal);
4055       CodeGenFunction::JumpDest &ContDest =
4056           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4057       CodeGenFunction::JumpDest &ExitDest =
4058           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4059       // N = <number-of_iterations>;
4060       llvm::Value *N = Uppers[I];
4061       // cont:
4062       // if (Counter < N) goto body; else goto exit;
4063       CGF.EmitBlock(ContDest.getBlock());
4064       auto *CVal =
4065           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4066       llvm::Value *Cmp =
4067           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4068               ? CGF.Builder.CreateICmpSLT(CVal, N)
4069               : CGF.Builder.CreateICmpULT(CVal, N);
4070       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4071       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4072       // body:
4073       CGF.EmitBlock(BodyBB);
4074       // Iteri = Begini + Counter * Stepi;
4075       CGF.EmitIgnoredExpr(HelperData.Update);
4076     }
4077   }
4078   ~OMPIteratorGeneratorScope() {
4079     if (!E)
4080       return;
4081     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4082       // Counter = Counter + 1;
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4084       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4085       // goto cont;
4086       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4087       // exit:
4088       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4089     }
4090   }
4091 };
4092 } // namespace
4093 
4094 static std::pair<llvm::Value *, llvm::Value *>
4095 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4096   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4097   llvm::Value *Addr;
4098   if (OASE) {
4099     const Expr *Base = OASE->getBase();
4100     Addr = CGF.EmitScalarExpr(Base);
4101   } else {
4102     Addr = CGF.EmitLValue(E).getPointer(CGF);
4103   }
4104   llvm::Value *SizeVal;
4105   QualType Ty = E->getType();
4106   if (OASE) {
4107     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4108     for (const Expr *SE : OASE->getDimensions()) {
4109       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4110       Sz = CGF.EmitScalarConversion(
4111           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4112       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4113     }
4114   } else if (const auto *ASE =
4115                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4116     LValue UpAddrLVal =
4117         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4118     llvm::Value *UpAddr =
4119         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4120     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4121     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4122     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4123   } else {
4124     SizeVal = CGF.getTypeSize(Ty);
4125   }
4126   return std::make_pair(Addr, SizeVal);
4127 }
4128 
4129 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4130 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4131   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4132   if (KmpTaskAffinityInfoTy.isNull()) {
4133     RecordDecl *KmpAffinityInfoRD =
4134         C.buildImplicitRecord("kmp_task_affinity_info_t");
4135     KmpAffinityInfoRD->startDefinition();
4136     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4137     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4138     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4139     KmpAffinityInfoRD->completeDefinition();
4140     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4141   }
4142 }
4143 
4144 CGOpenMPRuntime::TaskResultTy
4145 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4146                               const OMPExecutableDirective &D,
4147                               llvm::Function *TaskFunction, QualType SharedsTy,
4148                               Address Shareds, const OMPTaskDataTy &Data) {
4149   ASTContext &C = CGM.getContext();
4150   llvm::SmallVector<PrivateDataTy, 4> Privates;
4151   // Aggregate privates and sort them by the alignment.
4152   const auto *I = Data.PrivateCopies.begin();
4153   for (const Expr *E : Data.PrivateVars) {
4154     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4155     Privates.emplace_back(
4156         C.getDeclAlign(VD),
4157         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4158                          /*PrivateElemInit=*/nullptr));
4159     ++I;
4160   }
4161   I = Data.FirstprivateCopies.begin();
4162   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4163   for (const Expr *E : Data.FirstprivateVars) {
4164     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4165     Privates.emplace_back(
4166         C.getDeclAlign(VD),
4167         PrivateHelpersTy(
4168             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4169             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4170     ++I;
4171     ++IElemInitRef;
4172   }
4173   I = Data.LastprivateCopies.begin();
4174   for (const Expr *E : Data.LastprivateVars) {
4175     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4176     Privates.emplace_back(
4177         C.getDeclAlign(VD),
4178         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4179                          /*PrivateElemInit=*/nullptr));
4180     ++I;
4181   }
4182   for (const VarDecl *VD : Data.PrivateLocals) {
4183     if (isAllocatableDecl(VD))
4184       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4185     else
4186       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4187   }
4188   llvm::stable_sort(Privates,
4189                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4190                       return L.first > R.first;
4191                     });
4192   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4193   // Build type kmp_routine_entry_t (if not built yet).
4194   emitKmpRoutineEntryT(KmpInt32Ty);
4195   // Build type kmp_task_t (if not built yet).
4196   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4197     if (SavedKmpTaskloopTQTy.isNull()) {
4198       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4199           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4200     }
4201     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4202   } else {
4203     assert((D.getDirectiveKind() == OMPD_task ||
4204             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4205             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4206            "Expected taskloop, task or target directive");
4207     if (SavedKmpTaskTQTy.isNull()) {
4208       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4209           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4210     }
4211     KmpTaskTQTy = SavedKmpTaskTQTy;
4212   }
4213   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4214   // Build particular struct kmp_task_t for the given task.
4215   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4216       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4217   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4218   QualType KmpTaskTWithPrivatesPtrQTy =
4219       C.getPointerType(KmpTaskTWithPrivatesQTy);
4220   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4221   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4222       KmpTaskTWithPrivatesTy->getPointerTo();
4223   llvm::Value *KmpTaskTWithPrivatesTySize =
4224       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4225   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4226 
4227   // Emit initial values for private copies (if any).
4228   llvm::Value *TaskPrivatesMap = nullptr;
4229   llvm::Type *TaskPrivatesMapTy =
4230       std::next(TaskFunction->arg_begin(), 3)->getType();
4231   if (!Privates.empty()) {
4232     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4233     TaskPrivatesMap =
4234         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4235     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4236         TaskPrivatesMap, TaskPrivatesMapTy);
4237   } else {
4238     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4239         cast<llvm::PointerType>(TaskPrivatesMapTy));
4240   }
4241   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4242   // kmp_task_t *tt);
4243   llvm::Function *TaskEntry = emitProxyTaskFunction(
4244       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4245       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4246       TaskPrivatesMap);
4247 
4248   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4249   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4250   // kmp_routine_entry_t *task_entry);
4251   // Task flags. Format is taken from
4252   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4253   // description of kmp_tasking_flags struct.
4254   enum {
4255     TiedFlag = 0x1,
4256     FinalFlag = 0x2,
4257     DestructorsFlag = 0x8,
4258     PriorityFlag = 0x20,
4259     DetachableFlag = 0x40,
4260   };
4261   unsigned Flags = Data.Tied ? TiedFlag : 0;
4262   bool NeedsCleanup = false;
4263   if (!Privates.empty()) {
4264     NeedsCleanup =
4265         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4266     if (NeedsCleanup)
4267       Flags = Flags | DestructorsFlag;
4268   }
4269   if (Data.Priority.getInt())
4270     Flags = Flags | PriorityFlag;
4271   if (D.hasClausesOfKind<OMPDetachClause>())
4272     Flags = Flags | DetachableFlag;
4273   llvm::Value *TaskFlags =
4274       Data.Final.getPointer()
4275           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4276                                      CGF.Builder.getInt32(FinalFlag),
4277                                      CGF.Builder.getInt32(/*C=*/0))
4278           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4279   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4280   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4281   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4282       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4283       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4284           TaskEntry, KmpRoutineEntryPtrTy)};
4285   llvm::Value *NewTask;
4286   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4287     // Check if we have any device clause associated with the directive.
4288     const Expr *Device = nullptr;
4289     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4290       Device = C->getDevice();
4291     // Emit device ID if any otherwise use default value.
4292     llvm::Value *DeviceID;
4293     if (Device)
4294       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4295                                            CGF.Int64Ty, /*isSigned=*/true);
4296     else
4297       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4298     AllocArgs.push_back(DeviceID);
4299     NewTask = CGF.EmitRuntimeCall(
4300         OMPBuilder.getOrCreateRuntimeFunction(
4301             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4302         AllocArgs);
4303   } else {
4304     NewTask =
4305         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4306                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4307                             AllocArgs);
4308   }
4309   // Emit detach clause initialization.
4310   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4311   // task_descriptor);
4312   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4313     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4314     LValue EvtLVal = CGF.EmitLValue(Evt);
4315 
4316     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4317     // int gtid, kmp_task_t *task);
4318     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4319     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4320     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4321     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4322         OMPBuilder.getOrCreateRuntimeFunction(
4323             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4324         {Loc, Tid, NewTask});
4325     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4326                                       Evt->getExprLoc());
4327     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4328   }
4329   // Process affinity clauses.
4330   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4331     // Process list of affinity data.
4332     ASTContext &C = CGM.getContext();
4333     Address AffinitiesArray = Address::invalid();
4334     // Calculate number of elements to form the array of affinity data.
4335     llvm::Value *NumOfElements = nullptr;
4336     unsigned NumAffinities = 0;
4337     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4338       if (const Expr *Modifier = C->getModifier()) {
4339         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4340         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4341           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4342           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4343           NumOfElements =
4344               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4345         }
4346       } else {
4347         NumAffinities += C->varlist_size();
4348       }
4349     }
4350     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4351     // Fields ids in kmp_task_affinity_info record.
4352     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4353 
4354     QualType KmpTaskAffinityInfoArrayTy;
4355     if (NumOfElements) {
4356       NumOfElements = CGF.Builder.CreateNUWAdd(
4357           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4358       OpaqueValueExpr OVE(
4359           Loc,
4360           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4361           VK_RValue);
4362       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4363                                                     RValue::get(NumOfElements));
4364       KmpTaskAffinityInfoArrayTy =
4365           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4366                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4367       // Properly emit variable-sized array.
4368       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4369                                            ImplicitParamDecl::Other);
4370       CGF.EmitVarDecl(*PD);
4371       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4372       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4373                                                 /*isSigned=*/false);
4374     } else {
4375       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4376           KmpTaskAffinityInfoTy,
4377           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4378           ArrayType::Normal, /*IndexTypeQuals=*/0);
4379       AffinitiesArray =
4380           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4381       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4382       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4383                                              /*isSigned=*/false);
4384     }
4385 
4386     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4387     // Fill array by elements without iterators.
4388     unsigned Pos = 0;
4389     bool HasIterator = false;
4390     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4391       if (C->getModifier()) {
4392         HasIterator = true;
4393         continue;
4394       }
4395       for (const Expr *E : C->varlists()) {
4396         llvm::Value *Addr;
4397         llvm::Value *Size;
4398         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4399         LValue Base =
4400             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4401                                KmpTaskAffinityInfoTy);
4402         // affs[i].base_addr = &<Affinities[i].second>;
4403         LValue BaseAddrLVal = CGF.EmitLValueForField(
4404             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4405         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4406                               BaseAddrLVal);
4407         // affs[i].len = sizeof(<Affinities[i].second>);
4408         LValue LenLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4410         CGF.EmitStoreOfScalar(Size, LenLVal);
4411         ++Pos;
4412       }
4413     }
4414     LValue PosLVal;
4415     if (HasIterator) {
4416       PosLVal = CGF.MakeAddrLValue(
4417           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4418           C.getSizeType());
4419       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4420     }
4421     // Process elements with iterators.
4422     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4423       const Expr *Modifier = C->getModifier();
4424       if (!Modifier)
4425         continue;
4426       OMPIteratorGeneratorScope IteratorScope(
4427           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4428       for (const Expr *E : C->varlists()) {
4429         llvm::Value *Addr;
4430         llvm::Value *Size;
4431         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4432         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4433         LValue Base = CGF.MakeAddrLValue(
4434             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4435                     AffinitiesArray.getAlignment()),
4436             KmpTaskAffinityInfoTy);
4437         // affs[i].base_addr = &<Affinities[i].second>;
4438         LValue BaseAddrLVal = CGF.EmitLValueForField(
4439             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4440         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4441                               BaseAddrLVal);
4442         // affs[i].len = sizeof(<Affinities[i].second>);
4443         LValue LenLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4445         CGF.EmitStoreOfScalar(Size, LenLVal);
4446         Idx = CGF.Builder.CreateNUWAdd(
4447             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4448         CGF.EmitStoreOfScalar(Idx, PosLVal);
4449       }
4450     }
4451     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4452     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4453     // naffins, kmp_task_affinity_info_t *affin_list);
4454     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4455     llvm::Value *GTid = getThreadID(CGF, Loc);
4456     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4457         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4458     // FIXME: Emit the function and ignore its result for now unless the
4459     // runtime function is properly implemented.
4460     (void)CGF.EmitRuntimeCall(
4461         OMPBuilder.getOrCreateRuntimeFunction(
4462             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4463         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4464   }
4465   llvm::Value *NewTaskNewTaskTTy =
4466       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4467           NewTask, KmpTaskTWithPrivatesPtrTy);
4468   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4469                                                KmpTaskTWithPrivatesQTy);
4470   LValue TDBase =
4471       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4472   // Fill the data in the resulting kmp_task_t record.
4473   // Copy shareds if there are any.
4474   Address KmpTaskSharedsPtr = Address::invalid();
4475   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4476     KmpTaskSharedsPtr =
4477         Address(CGF.EmitLoadOfScalar(
4478                     CGF.EmitLValueForField(
4479                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4480                                            KmpTaskTShareds)),
4481                     Loc),
4482                 CGM.getNaturalTypeAlignment(SharedsTy));
4483     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4484     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4485     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4486   }
4487   // Emit initial values for private copies (if any).
4488   TaskResultTy Result;
4489   if (!Privates.empty()) {
4490     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4491                      SharedsTy, SharedsPtrTy, Data, Privates,
4492                      /*ForDup=*/false);
4493     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4494         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4495       Result.TaskDupFn = emitTaskDupFunction(
4496           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4497           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4498           /*WithLastIter=*/!Data.LastprivateVars.empty());
4499     }
4500   }
4501   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4502   enum { Priority = 0, Destructors = 1 };
4503   // Provide pointer to function with destructors for privates.
4504   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4505   const RecordDecl *KmpCmplrdataUD =
4506       (*FI)->getType()->getAsUnionType()->getDecl();
4507   if (NeedsCleanup) {
4508     llvm::Value *DestructorFn = emitDestructorsFunction(
4509         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4510         KmpTaskTWithPrivatesQTy);
4511     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4512     LValue DestructorsLV = CGF.EmitLValueForField(
4513         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4514     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4515                               DestructorFn, KmpRoutineEntryPtrTy),
4516                           DestructorsLV);
4517   }
4518   // Set priority.
4519   if (Data.Priority.getInt()) {
4520     LValue Data2LV = CGF.EmitLValueForField(
4521         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4522     LValue PriorityLV = CGF.EmitLValueForField(
4523         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4524     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4525   }
4526   Result.NewTask = NewTask;
4527   Result.TaskEntry = TaskEntry;
4528   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4529   Result.TDBase = TDBase;
4530   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4531   return Result;
4532 }
4533 
4534 namespace {
4535 /// Dependence kind for RTL.
4536 enum RTLDependenceKindTy {
4537   DepIn = 0x01,
4538   DepInOut = 0x3,
4539   DepMutexInOutSet = 0x4
4540 };
4541 /// Fields ids in kmp_depend_info record.
4542 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4543 } // namespace
4544 
4545 /// Translates internal dependency kind into the runtime kind.
4546 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4547   RTLDependenceKindTy DepKind;
4548   switch (K) {
4549   case OMPC_DEPEND_in:
4550     DepKind = DepIn;
4551     break;
4552   // Out and InOut dependencies must use the same code.
4553   case OMPC_DEPEND_out:
4554   case OMPC_DEPEND_inout:
4555     DepKind = DepInOut;
4556     break;
4557   case OMPC_DEPEND_mutexinoutset:
4558     DepKind = DepMutexInOutSet;
4559     break;
4560   case OMPC_DEPEND_source:
4561   case OMPC_DEPEND_sink:
4562   case OMPC_DEPEND_depobj:
4563   case OMPC_DEPEND_unknown:
4564     llvm_unreachable("Unknown task dependence type");
4565   }
4566   return DepKind;
4567 }
4568 
4569 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4570 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4571                            QualType &FlagsTy) {
4572   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4573   if (KmpDependInfoTy.isNull()) {
4574     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4575     KmpDependInfoRD->startDefinition();
4576     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4577     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4578     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4579     KmpDependInfoRD->completeDefinition();
4580     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4581   }
4582 }
4583 
4584 std::pair<llvm::Value *, LValue>
4585 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4586                                    SourceLocation Loc) {
4587   ASTContext &C = CGM.getContext();
4588   QualType FlagsTy;
4589   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4590   RecordDecl *KmpDependInfoRD =
4591       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4592   LValue Base = CGF.EmitLoadOfPointerLValue(
4593       DepobjLVal.getAddress(CGF),
4594       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4595   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4596   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4597           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4598   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4599                             Base.getTBAAInfo());
4600   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4601       Addr.getPointer(),
4602       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4603   LValue NumDepsBase = CGF.MakeAddrLValue(
4604       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4605       Base.getBaseInfo(), Base.getTBAAInfo());
4606   // NumDeps = deps[i].base_addr;
4607   LValue BaseAddrLVal = CGF.EmitLValueForField(
4608       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4609   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4610   return std::make_pair(NumDeps, Base);
4611 }
4612 
4613 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4614                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4615                            const OMPTaskDataTy::DependData &Data,
4616                            Address DependenciesArray) {
4617   CodeGenModule &CGM = CGF.CGM;
4618   ASTContext &C = CGM.getContext();
4619   QualType FlagsTy;
4620   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4621   RecordDecl *KmpDependInfoRD =
4622       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4623   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4624 
4625   OMPIteratorGeneratorScope IteratorScope(
4626       CGF, cast_or_null<OMPIteratorExpr>(
4627                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4628                                  : nullptr));
4629   for (const Expr *E : Data.DepExprs) {
4630     llvm::Value *Addr;
4631     llvm::Value *Size;
4632     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4633     LValue Base;
4634     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4635       Base = CGF.MakeAddrLValue(
4636           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4637     } else {
4638       LValue &PosLVal = *Pos.get<LValue *>();
4639       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4640       Base = CGF.MakeAddrLValue(
4641           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4642                   DependenciesArray.getAlignment()),
4643           KmpDependInfoTy);
4644     }
4645     // deps[i].base_addr = &<Dependencies[i].second>;
4646     LValue BaseAddrLVal = CGF.EmitLValueForField(
4647         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4648     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4649                           BaseAddrLVal);
4650     // deps[i].len = sizeof(<Dependencies[i].second>);
4651     LValue LenLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4653     CGF.EmitStoreOfScalar(Size, LenLVal);
4654     // deps[i].flags = <Dependencies[i].first>;
4655     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4656     LValue FlagsLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4658     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4659                           FlagsLVal);
4660     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4661       ++(*P);
4662     } else {
4663       LValue &PosLVal = *Pos.get<LValue *>();
4664       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4665       Idx = CGF.Builder.CreateNUWAdd(Idx,
4666                                      llvm::ConstantInt::get(Idx->getType(), 1));
4667       CGF.EmitStoreOfScalar(Idx, PosLVal);
4668     }
4669   }
4670 }
4671 
4672 static SmallVector<llvm::Value *, 4>
4673 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4674                         const OMPTaskDataTy::DependData &Data) {
4675   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4676          "Expected depobj dependecy kind.");
4677   SmallVector<llvm::Value *, 4> Sizes;
4678   SmallVector<LValue, 4> SizeLVals;
4679   ASTContext &C = CGF.getContext();
4680   QualType FlagsTy;
4681   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4682   RecordDecl *KmpDependInfoRD =
4683       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4684   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4685   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4686   {
4687     OMPIteratorGeneratorScope IteratorScope(
4688         CGF, cast_or_null<OMPIteratorExpr>(
4689                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4690                                    : nullptr));
4691     for (const Expr *E : Data.DepExprs) {
4692       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4693       LValue Base = CGF.EmitLoadOfPointerLValue(
4694           DepobjLVal.getAddress(CGF),
4695           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4696       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4697           Base.getAddress(CGF), KmpDependInfoPtrT);
4698       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4699                                 Base.getTBAAInfo());
4700       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4701           Addr.getPointer(),
4702           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4703       LValue NumDepsBase = CGF.MakeAddrLValue(
4704           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4705           Base.getBaseInfo(), Base.getTBAAInfo());
4706       // NumDeps = deps[i].base_addr;
4707       LValue BaseAddrLVal = CGF.EmitLValueForField(
4708           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4709       llvm::Value *NumDeps =
4710           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4711       LValue NumLVal = CGF.MakeAddrLValue(
4712           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4713           C.getUIntPtrType());
4714       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4715                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4716       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4717       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4718       CGF.EmitStoreOfScalar(Add, NumLVal);
4719       SizeLVals.push_back(NumLVal);
4720     }
4721   }
4722   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4723     llvm::Value *Size =
4724         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4725     Sizes.push_back(Size);
4726   }
4727   return Sizes;
4728 }
4729 
4730 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731                                LValue PosLVal,
4732                                const OMPTaskDataTy::DependData &Data,
4733                                Address DependenciesArray) {
4734   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4735          "Expected depobj dependecy kind.");
4736   ASTContext &C = CGF.getContext();
4737   QualType FlagsTy;
4738   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4739   RecordDecl *KmpDependInfoRD =
4740       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4741   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4742   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4743   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4744   {
4745     OMPIteratorGeneratorScope IteratorScope(
4746         CGF, cast_or_null<OMPIteratorExpr>(
4747                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4748                                    : nullptr));
4749     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4750       const Expr *E = Data.DepExprs[I];
4751       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4752       LValue Base = CGF.EmitLoadOfPointerLValue(
4753           DepobjLVal.getAddress(CGF),
4754           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4755       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4756           Base.getAddress(CGF), KmpDependInfoPtrT);
4757       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4758                                 Base.getTBAAInfo());
4759 
4760       // Get number of elements in a single depobj.
4761       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4762           Addr.getPointer(),
4763           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4764       LValue NumDepsBase = CGF.MakeAddrLValue(
4765           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4766           Base.getBaseInfo(), Base.getTBAAInfo());
4767       // NumDeps = deps[i].base_addr;
4768       LValue BaseAddrLVal = CGF.EmitLValueForField(
4769           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4770       llvm::Value *NumDeps =
4771           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4772 
4773       // memcopy dependency data.
4774       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4775           ElSize,
4776           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4777       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4778       Address DepAddr =
4779           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4780                   DependenciesArray.getAlignment());
4781       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4782 
4783       // Increase pos.
4784       // pos += size;
4785       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4786       CGF.EmitStoreOfScalar(Add, PosLVal);
4787     }
4788   }
4789 }
4790 
4791 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4792     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4793     SourceLocation Loc) {
4794   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4795         return D.DepExprs.empty();
4796       }))
4797     return std::make_pair(nullptr, Address::invalid());
4798   // Process list of dependencies.
4799   ASTContext &C = CGM.getContext();
4800   Address DependenciesArray = Address::invalid();
4801   llvm::Value *NumOfElements = nullptr;
4802   unsigned NumDependencies = std::accumulate(
4803       Dependencies.begin(), Dependencies.end(), 0,
4804       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4805         return D.DepKind == OMPC_DEPEND_depobj
4806                    ? V
4807                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4808       });
4809   QualType FlagsTy;
4810   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4811   bool HasDepobjDeps = false;
4812   bool HasRegularWithIterators = false;
4813   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4814   llvm::Value *NumOfRegularWithIterators =
4815       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4816   // Calculate number of depobj dependecies and regular deps with the iterators.
4817   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4818     if (D.DepKind == OMPC_DEPEND_depobj) {
4819       SmallVector<llvm::Value *, 4> Sizes =
4820           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4821       for (llvm::Value *Size : Sizes) {
4822         NumOfDepobjElements =
4823             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4824       }
4825       HasDepobjDeps = true;
4826       continue;
4827     }
4828     // Include number of iterations, if any.
4829     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4830       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4831         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4832         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4833         NumOfRegularWithIterators =
4834             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4835       }
4836       HasRegularWithIterators = true;
4837       continue;
4838     }
4839   }
4840 
4841   QualType KmpDependInfoArrayTy;
4842   if (HasDepobjDeps || HasRegularWithIterators) {
4843     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4844                                            /*isSigned=*/false);
4845     if (HasDepobjDeps) {
4846       NumOfElements =
4847           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4848     }
4849     if (HasRegularWithIterators) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4852     }
4853     OpaqueValueExpr OVE(Loc,
4854                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4855                         VK_RValue);
4856     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4857                                                   RValue::get(NumOfElements));
4858     KmpDependInfoArrayTy =
4859         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4860                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4861     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4862     // Properly emit variable-sized array.
4863     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4864                                          ImplicitParamDecl::Other);
4865     CGF.EmitVarDecl(*PD);
4866     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4867     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4868                                               /*isSigned=*/false);
4869   } else {
4870     KmpDependInfoArrayTy = C.getConstantArrayType(
4871         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4872         ArrayType::Normal, /*IndexTypeQuals=*/0);
4873     DependenciesArray =
4874         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4875     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4876     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4877                                            /*isSigned=*/false);
4878   }
4879   unsigned Pos = 0;
4880   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4881     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4882         Dependencies[I].IteratorExpr)
4883       continue;
4884     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4885                    DependenciesArray);
4886   }
4887   // Copy regular dependecies with iterators.
4888   LValue PosLVal = CGF.MakeAddrLValue(
4889       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4890   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4891   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4892     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4893         !Dependencies[I].IteratorExpr)
4894       continue;
4895     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4896                    DependenciesArray);
4897   }
4898   // Copy final depobj arrays without iterators.
4899   if (HasDepobjDeps) {
4900     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4901       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4902         continue;
4903       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4904                          DependenciesArray);
4905     }
4906   }
4907   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4908       DependenciesArray, CGF.VoidPtrTy);
4909   return std::make_pair(NumOfElements, DependenciesArray);
4910 }
4911 
4912 Address CGOpenMPRuntime::emitDepobjDependClause(
4913     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4914     SourceLocation Loc) {
4915   if (Dependencies.DepExprs.empty())
4916     return Address::invalid();
4917   // Process list of dependencies.
4918   ASTContext &C = CGM.getContext();
4919   Address DependenciesArray = Address::invalid();
4920   unsigned NumDependencies = Dependencies.DepExprs.size();
4921   QualType FlagsTy;
4922   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4923   RecordDecl *KmpDependInfoRD =
4924       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4925 
4926   llvm::Value *Size;
4927   // Define type kmp_depend_info[<Dependencies.size()>];
4928   // For depobj reserve one extra element to store the number of elements.
4929   // It is required to handle depobj(x) update(in) construct.
4930   // kmp_depend_info[<Dependencies.size()>] deps;
4931   llvm::Value *NumDepsVal;
4932   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4933   if (const auto *IE =
4934           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4935     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4936     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4937       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4938       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4939       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4940     }
4941     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4942                                     NumDepsVal);
4943     CharUnits SizeInBytes =
4944         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4945     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4946     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4947     NumDepsVal =
4948         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4949   } else {
4950     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4951         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4952         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4953     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4954     Size = CGM.getSize(Sz.alignTo(Align));
4955     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4956   }
4957   // Need to allocate on the dynamic memory.
4958   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4959   // Use default allocator.
4960   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4961   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4962 
4963   llvm::Value *Addr =
4964       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4965                               CGM.getModule(), OMPRTL___kmpc_alloc),
4966                           Args, ".dep.arr.addr");
4967   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4968       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4969   DependenciesArray = Address(Addr, Align);
4970   // Write number of elements in the first element of array for depobj.
4971   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4972   // deps[i].base_addr = NumDependencies;
4973   LValue BaseAddrLVal = CGF.EmitLValueForField(
4974       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4975   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4976   llvm::PointerUnion<unsigned *, LValue *> Pos;
4977   unsigned Idx = 1;
4978   LValue PosLVal;
4979   if (Dependencies.IteratorExpr) {
4980     PosLVal = CGF.MakeAddrLValue(
4981         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4982         C.getSizeType());
4983     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4984                           /*IsInit=*/true);
4985     Pos = &PosLVal;
4986   } else {
4987     Pos = &Idx;
4988   }
4989   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4990   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4992   return DependenciesArray;
4993 }
4994 
4995 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4996                                         SourceLocation Loc) {
4997   ASTContext &C = CGM.getContext();
4998   QualType FlagsTy;
4999   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5000   LValue Base = CGF.EmitLoadOfPointerLValue(
5001       DepobjLVal.getAddress(CGF),
5002       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5003   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5004   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5005       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5006   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5007       Addr.getPointer(),
5008       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5009   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5010                                                                CGF.VoidPtrTy);
5011   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012   // Use default allocator.
5013   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5014   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5015 
5016   // _kmpc_free(gtid, addr, nullptr);
5017   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5018                                 CGM.getModule(), OMPRTL___kmpc_free),
5019                             Args);
5020 }
5021 
5022 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5023                                        OpenMPDependClauseKind NewDepKind,
5024                                        SourceLocation Loc) {
5025   ASTContext &C = CGM.getContext();
5026   QualType FlagsTy;
5027   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5028   RecordDecl *KmpDependInfoRD =
5029       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5030   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5031   llvm::Value *NumDeps;
5032   LValue Base;
5033   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5034 
5035   Address Begin = Base.getAddress(CGF);
5036   // Cast from pointer to array type to pointer to single element.
5037   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5038   // The basic structure here is a while-do loop.
5039   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5040   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5041   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5042   CGF.EmitBlock(BodyBB);
5043   llvm::PHINode *ElementPHI =
5044       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5045   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5046   Begin = Address(ElementPHI, Begin.getAlignment());
5047   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5048                             Base.getTBAAInfo());
5049   // deps[i].flags = NewDepKind;
5050   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5051   LValue FlagsLVal = CGF.EmitLValueForField(
5052       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5053   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5054                         FlagsLVal);
5055 
5056   // Shift the address forward by one element.
5057   Address ElementNext =
5058       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5059   ElementPHI->addIncoming(ElementNext.getPointer(),
5060                           CGF.Builder.GetInsertBlock());
5061   llvm::Value *IsEmpty =
5062       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5063   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5064   // Done.
5065   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5066 }
5067 
5068 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5069                                    const OMPExecutableDirective &D,
5070                                    llvm::Function *TaskFunction,
5071                                    QualType SharedsTy, Address Shareds,
5072                                    const Expr *IfCond,
5073                                    const OMPTaskDataTy &Data) {
5074   if (!CGF.HaveInsertPoint())
5075     return;
5076 
5077   TaskResultTy Result =
5078       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5079   llvm::Value *NewTask = Result.NewTask;
5080   llvm::Function *TaskEntry = Result.TaskEntry;
5081   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5082   LValue TDBase = Result.TDBase;
5083   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5084   // Process list of dependences.
5085   Address DependenciesArray = Address::invalid();
5086   llvm::Value *NumOfElements;
5087   std::tie(NumOfElements, DependenciesArray) =
5088       emitDependClause(CGF, Data.Dependences, Loc);
5089 
5090   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5091   // libcall.
5092   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5093   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5094   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5095   // list is not empty
5096   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5097   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5098   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5099   llvm::Value *DepTaskArgs[7];
5100   if (!Data.Dependences.empty()) {
5101     DepTaskArgs[0] = UpLoc;
5102     DepTaskArgs[1] = ThreadID;
5103     DepTaskArgs[2] = NewTask;
5104     DepTaskArgs[3] = NumOfElements;
5105     DepTaskArgs[4] = DependenciesArray.getPointer();
5106     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5107     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5108   }
5109   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5110                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5111     if (!Data.Tied) {
5112       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5113       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5114       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5115     }
5116     if (!Data.Dependences.empty()) {
5117       CGF.EmitRuntimeCall(
5118           OMPBuilder.getOrCreateRuntimeFunction(
5119               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5120           DepTaskArgs);
5121     } else {
5122       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5123                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5124                           TaskArgs);
5125     }
5126     // Check if parent region is untied and build return for untied task;
5127     if (auto *Region =
5128             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5129       Region->emitUntiedSwitch(CGF);
5130   };
5131 
5132   llvm::Value *DepWaitTaskArgs[6];
5133   if (!Data.Dependences.empty()) {
5134     DepWaitTaskArgs[0] = UpLoc;
5135     DepWaitTaskArgs[1] = ThreadID;
5136     DepWaitTaskArgs[2] = NumOfElements;
5137     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5138     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5139     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5140   }
5141   auto &M = CGM.getModule();
5142   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5143                         TaskEntry, &Data, &DepWaitTaskArgs,
5144                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5145     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5146     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5147     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5148     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5149     // is specified.
5150     if (!Data.Dependences.empty())
5151       CGF.EmitRuntimeCall(
5152           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5153           DepWaitTaskArgs);
5154     // Call proxy_task_entry(gtid, new_task);
5155     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5156                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5157       Action.Enter(CGF);
5158       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5159       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5160                                                           OutlinedFnArgs);
5161     };
5162 
5163     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5164     // kmp_task_t *new_task);
5165     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5166     // kmp_task_t *new_task);
5167     RegionCodeGenTy RCG(CodeGen);
5168     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5169                               M, OMPRTL___kmpc_omp_task_begin_if0),
5170                           TaskArgs,
5171                           OMPBuilder.getOrCreateRuntimeFunction(
5172                               M, OMPRTL___kmpc_omp_task_complete_if0),
5173                           TaskArgs);
5174     RCG.setAction(Action);
5175     RCG(CGF);
5176   };
5177 
5178   if (IfCond) {
5179     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5180   } else {
5181     RegionCodeGenTy ThenRCG(ThenCodeGen);
5182     ThenRCG(CGF);
5183   }
5184 }
5185 
5186 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5187                                        const OMPLoopDirective &D,
5188                                        llvm::Function *TaskFunction,
5189                                        QualType SharedsTy, Address Shareds,
5190                                        const Expr *IfCond,
5191                                        const OMPTaskDataTy &Data) {
5192   if (!CGF.HaveInsertPoint())
5193     return;
5194   TaskResultTy Result =
5195       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5196   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5197   // libcall.
5198   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5199   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5200   // sched, kmp_uint64 grainsize, void *task_dup);
5201   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5202   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5203   llvm::Value *IfVal;
5204   if (IfCond) {
5205     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5206                                       /*isSigned=*/true);
5207   } else {
5208     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5209   }
5210 
5211   LValue LBLVal = CGF.EmitLValueForField(
5212       Result.TDBase,
5213       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5214   const auto *LBVar =
5215       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5216   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5217                        LBLVal.getQuals(),
5218                        /*IsInitializer=*/true);
5219   LValue UBLVal = CGF.EmitLValueForField(
5220       Result.TDBase,
5221       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5222   const auto *UBVar =
5223       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5224   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5225                        UBLVal.getQuals(),
5226                        /*IsInitializer=*/true);
5227   LValue StLVal = CGF.EmitLValueForField(
5228       Result.TDBase,
5229       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5230   const auto *StVar =
5231       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5232   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5233                        StLVal.getQuals(),
5234                        /*IsInitializer=*/true);
5235   // Store reductions address.
5236   LValue RedLVal = CGF.EmitLValueForField(
5237       Result.TDBase,
5238       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5239   if (Data.Reductions) {
5240     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5241   } else {
5242     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5243                                CGF.getContext().VoidPtrTy);
5244   }
5245   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5246   llvm::Value *TaskArgs[] = {
5247       UpLoc,
5248       ThreadID,
5249       Result.NewTask,
5250       IfVal,
5251       LBLVal.getPointer(CGF),
5252       UBLVal.getPointer(CGF),
5253       CGF.EmitLoadOfScalar(StLVal, Loc),
5254       llvm::ConstantInt::getSigned(
5255           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5256       llvm::ConstantInt::getSigned(
5257           CGF.IntTy, Data.Schedule.getPointer()
5258                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5259                          : NoSchedule),
5260       Data.Schedule.getPointer()
5261           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5262                                       /*isSigned=*/false)
5263           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5264       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5265                              Result.TaskDupFn, CGF.VoidPtrTy)
5266                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5267   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5268                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5269                       TaskArgs);
5270 }
5271 
5272 /// Emit reduction operation for each element of array (required for
5273 /// array sections) LHS op = RHS.
5274 /// \param Type Type of array.
5275 /// \param LHSVar Variable on the left side of the reduction operation
5276 /// (references element of array in original variable).
5277 /// \param RHSVar Variable on the right side of the reduction operation
5278 /// (references element of array in original variable).
5279 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5280 /// RHSVar.
5281 static void EmitOMPAggregateReduction(
5282     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5283     const VarDecl *RHSVar,
5284     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5285                                   const Expr *, const Expr *)> &RedOpGen,
5286     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5287     const Expr *UpExpr = nullptr) {
5288   // Perform element-by-element initialization.
5289   QualType ElementTy;
5290   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5291   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5292 
5293   // Drill down to the base element type on both arrays.
5294   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5295   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5296 
5297   llvm::Value *RHSBegin = RHSAddr.getPointer();
5298   llvm::Value *LHSBegin = LHSAddr.getPointer();
5299   // Cast from pointer to array type to pointer to single element.
5300   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5301   // The basic structure here is a while-do loop.
5302   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5303   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5304   llvm::Value *IsEmpty =
5305       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5306   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5307 
5308   // Enter the loop body, making that address the current address.
5309   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5310   CGF.EmitBlock(BodyBB);
5311 
5312   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5313 
5314   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5315       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5316   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5317   Address RHSElementCurrent =
5318       Address(RHSElementPHI,
5319               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5320 
5321   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5322       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5323   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5324   Address LHSElementCurrent =
5325       Address(LHSElementPHI,
5326               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5327 
5328   // Emit copy.
5329   CodeGenFunction::OMPPrivateScope Scope(CGF);
5330   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5331   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5332   Scope.Privatize();
5333   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5334   Scope.ForceCleanup();
5335 
5336   // Shift the address forward by one element.
5337   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5338       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5339   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5340       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5341   // Check whether we've reached the end.
5342   llvm::Value *Done =
5343       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5344   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5345   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5346   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5347 
5348   // Done.
5349   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5350 }
5351 
5352 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5353 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5354 /// UDR combiner function.
5355 static void emitReductionCombiner(CodeGenFunction &CGF,
5356                                   const Expr *ReductionOp) {
5357   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5358     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5359       if (const auto *DRE =
5360               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5361         if (const auto *DRD =
5362                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5363           std::pair<llvm::Function *, llvm::Function *> Reduction =
5364               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5365           RValue Func = RValue::get(Reduction.first);
5366           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5367           CGF.EmitIgnoredExpr(ReductionOp);
5368           return;
5369         }
5370   CGF.EmitIgnoredExpr(ReductionOp);
5371 }
5372 
5373 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5374     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5375     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5376     ArrayRef<const Expr *> ReductionOps) {
5377   ASTContext &C = CGM.getContext();
5378 
5379   // void reduction_func(void *LHSArg, void *RHSArg);
5380   FunctionArgList Args;
5381   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5382                            ImplicitParamDecl::Other);
5383   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5384                            ImplicitParamDecl::Other);
5385   Args.push_back(&LHSArg);
5386   Args.push_back(&RHSArg);
5387   const auto &CGFI =
5388       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5389   std::string Name = getName({"omp", "reduction", "reduction_func"});
5390   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5391                                     llvm::GlobalValue::InternalLinkage, Name,
5392                                     &CGM.getModule());
5393   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5394   Fn->setDoesNotRecurse();
5395   CodeGenFunction CGF(CGM);
5396   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5397 
5398   // Dst = (void*[n])(LHSArg);
5399   // Src = (void*[n])(RHSArg);
5400   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5402       ArgsType), CGF.getPointerAlign());
5403   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5405       ArgsType), CGF.getPointerAlign());
5406 
5407   //  ...
5408   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5409   //  ...
5410   CodeGenFunction::OMPPrivateScope Scope(CGF);
5411   auto IPriv = Privates.begin();
5412   unsigned Idx = 0;
5413   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5414     const auto *RHSVar =
5415         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5416     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5417       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5418     });
5419     const auto *LHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5421     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5423     });
5424     QualType PrivTy = (*IPriv)->getType();
5425     if (PrivTy->isVariablyModifiedType()) {
5426       // Get array size and emit VLA type.
5427       ++Idx;
5428       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5429       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5430       const VariableArrayType *VLA =
5431           CGF.getContext().getAsVariableArrayType(PrivTy);
5432       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5433       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5434           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5435       CGF.EmitVariablyModifiedType(PrivTy);
5436     }
5437   }
5438   Scope.Privatize();
5439   IPriv = Privates.begin();
5440   auto ILHS = LHSExprs.begin();
5441   auto IRHS = RHSExprs.begin();
5442   for (const Expr *E : ReductionOps) {
5443     if ((*IPriv)->getType()->isArrayType()) {
5444       // Emit reduction for array section.
5445       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5446       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5447       EmitOMPAggregateReduction(
5448           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5449           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5450             emitReductionCombiner(CGF, E);
5451           });
5452     } else {
5453       // Emit reduction for array subscript or single variable.
5454       emitReductionCombiner(CGF, E);
5455     }
5456     ++IPriv;
5457     ++ILHS;
5458     ++IRHS;
5459   }
5460   Scope.ForceCleanup();
5461   CGF.FinishFunction();
5462   return Fn;
5463 }
5464 
5465 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5466                                                   const Expr *ReductionOp,
5467                                                   const Expr *PrivateRef,
5468                                                   const DeclRefExpr *LHS,
5469                                                   const DeclRefExpr *RHS) {
5470   if (PrivateRef->getType()->isArrayType()) {
5471     // Emit reduction for array section.
5472     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5473     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5474     EmitOMPAggregateReduction(
5475         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5476         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5477           emitReductionCombiner(CGF, ReductionOp);
5478         });
5479   } else {
5480     // Emit reduction for array subscript or single variable.
5481     emitReductionCombiner(CGF, ReductionOp);
5482   }
5483 }
5484 
5485 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5486                                     ArrayRef<const Expr *> Privates,
5487                                     ArrayRef<const Expr *> LHSExprs,
5488                                     ArrayRef<const Expr *> RHSExprs,
5489                                     ArrayRef<const Expr *> ReductionOps,
5490                                     ReductionOptionsTy Options) {
5491   if (!CGF.HaveInsertPoint())
5492     return;
5493 
5494   bool WithNowait = Options.WithNowait;
5495   bool SimpleReduction = Options.SimpleReduction;
5496 
5497   // Next code should be emitted for reduction:
5498   //
5499   // static kmp_critical_name lock = { 0 };
5500   //
5501   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5502   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5503   //  ...
5504   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5505   //  *(Type<n>-1*)rhs[<n>-1]);
5506   // }
5507   //
5508   // ...
5509   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5510   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5511   // RedList, reduce_func, &<lock>)) {
5512   // case 1:
5513   //  ...
5514   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5515   //  ...
5516   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5517   // break;
5518   // case 2:
5519   //  ...
5520   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5521   //  ...
5522   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5523   // break;
5524   // default:;
5525   // }
5526   //
5527   // if SimpleReduction is true, only the next code is generated:
5528   //  ...
5529   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5530   //  ...
5531 
5532   ASTContext &C = CGM.getContext();
5533 
5534   if (SimpleReduction) {
5535     CodeGenFunction::RunCleanupsScope Scope(CGF);
5536     auto IPriv = Privates.begin();
5537     auto ILHS = LHSExprs.begin();
5538     auto IRHS = RHSExprs.begin();
5539     for (const Expr *E : ReductionOps) {
5540       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5541                                   cast<DeclRefExpr>(*IRHS));
5542       ++IPriv;
5543       ++ILHS;
5544       ++IRHS;
5545     }
5546     return;
5547   }
5548 
5549   // 1. Build a list of reduction variables.
5550   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5551   auto Size = RHSExprs.size();
5552   for (const Expr *E : Privates) {
5553     if (E->getType()->isVariablyModifiedType())
5554       // Reserve place for array size.
5555       ++Size;
5556   }
5557   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5558   QualType ReductionArrayTy =
5559       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5560                              /*IndexTypeQuals=*/0);
5561   Address ReductionList =
5562       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5563   auto IPriv = Privates.begin();
5564   unsigned Idx = 0;
5565   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5566     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5567     CGF.Builder.CreateStore(
5568         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5570         Elem);
5571     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5572       // Store array size.
5573       ++Idx;
5574       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5575       llvm::Value *Size = CGF.Builder.CreateIntCast(
5576           CGF.getVLASize(
5577                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5578               .NumElts,
5579           CGF.SizeTy, /*isSigned=*/false);
5580       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5581                               Elem);
5582     }
5583   }
5584 
5585   // 2. Emit reduce_func().
5586   llvm::Function *ReductionFn = emitReductionFunction(
5587       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5588       LHSExprs, RHSExprs, ReductionOps);
5589 
5590   // 3. Create static kmp_critical_name lock = { 0 };
5591   std::string Name = getName({"reduction"});
5592   llvm::Value *Lock = getCriticalRegionLock(Name);
5593 
5594   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5595   // RedList, reduce_func, &<lock>);
5596   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5597   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5598   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5599   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600       ReductionList.getPointer(), CGF.VoidPtrTy);
5601   llvm::Value *Args[] = {
5602       IdentTLoc,                             // ident_t *<loc>
5603       ThreadId,                              // i32 <gtid>
5604       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5605       ReductionArrayTySize,                  // size_type sizeof(RedList)
5606       RL,                                    // void *RedList
5607       ReductionFn, // void (*) (void *, void *) <reduce_func>
5608       Lock         // kmp_critical_name *&<lock>
5609   };
5610   llvm::Value *Res = CGF.EmitRuntimeCall(
5611       OMPBuilder.getOrCreateRuntimeFunction(
5612           CGM.getModule(),
5613           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5614       Args);
5615 
5616   // 5. Build switch(res)
5617   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5618   llvm::SwitchInst *SwInst =
5619       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5620 
5621   // 6. Build case 1:
5622   //  ...
5623   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5624   //  ...
5625   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5626   // break;
5627   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5628   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5629   CGF.EmitBlock(Case1BB);
5630 
5631   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5632   llvm::Value *EndArgs[] = {
5633       IdentTLoc, // ident_t *<loc>
5634       ThreadId,  // i32 <gtid>
5635       Lock       // kmp_critical_name *&<lock>
5636   };
5637   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5638                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5639     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5640     auto IPriv = Privates.begin();
5641     auto ILHS = LHSExprs.begin();
5642     auto IRHS = RHSExprs.begin();
5643     for (const Expr *E : ReductionOps) {
5644       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5645                                      cast<DeclRefExpr>(*IRHS));
5646       ++IPriv;
5647       ++ILHS;
5648       ++IRHS;
5649     }
5650   };
5651   RegionCodeGenTy RCG(CodeGen);
5652   CommonActionTy Action(
5653       nullptr, llvm::None,
5654       OMPBuilder.getOrCreateRuntimeFunction(
5655           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5656                                       : OMPRTL___kmpc_end_reduce),
5657       EndArgs);
5658   RCG.setAction(Action);
5659   RCG(CGF);
5660 
5661   CGF.EmitBranch(DefaultBB);
5662 
5663   // 7. Build case 2:
5664   //  ...
5665   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5666   //  ...
5667   // break;
5668   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5669   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5670   CGF.EmitBlock(Case2BB);
5671 
5672   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5673                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5674     auto ILHS = LHSExprs.begin();
5675     auto IRHS = RHSExprs.begin();
5676     auto IPriv = Privates.begin();
5677     for (const Expr *E : ReductionOps) {
5678       const Expr *XExpr = nullptr;
5679       const Expr *EExpr = nullptr;
5680       const Expr *UpExpr = nullptr;
5681       BinaryOperatorKind BO = BO_Comma;
5682       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5683         if (BO->getOpcode() == BO_Assign) {
5684           XExpr = BO->getLHS();
5685           UpExpr = BO->getRHS();
5686         }
5687       }
5688       // Try to emit update expression as a simple atomic.
5689       const Expr *RHSExpr = UpExpr;
5690       if (RHSExpr) {
5691         // Analyze RHS part of the whole expression.
5692         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5693                 RHSExpr->IgnoreParenImpCasts())) {
5694           // If this is a conditional operator, analyze its condition for
5695           // min/max reduction operator.
5696           RHSExpr = ACO->getCond();
5697         }
5698         if (const auto *BORHS =
5699                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5700           EExpr = BORHS->getRHS();
5701           BO = BORHS->getOpcode();
5702         }
5703       }
5704       if (XExpr) {
5705         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5706         auto &&AtomicRedGen = [BO, VD,
5707                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5708                                     const Expr *EExpr, const Expr *UpExpr) {
5709           LValue X = CGF.EmitLValue(XExpr);
5710           RValue E;
5711           if (EExpr)
5712             E = CGF.EmitAnyExpr(EExpr);
5713           CGF.EmitOMPAtomicSimpleUpdateExpr(
5714               X, E, BO, /*IsXLHSInRHSPart=*/true,
5715               llvm::AtomicOrdering::Monotonic, Loc,
5716               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5717                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5718                 PrivateScope.addPrivate(
5719                     VD, [&CGF, VD, XRValue, Loc]() {
5720                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5721                       CGF.emitOMPSimpleStore(
5722                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5723                           VD->getType().getNonReferenceType(), Loc);
5724                       return LHSTemp;
5725                     });
5726                 (void)PrivateScope.Privatize();
5727                 return CGF.EmitAnyExpr(UpExpr);
5728               });
5729         };
5730         if ((*IPriv)->getType()->isArrayType()) {
5731           // Emit atomic reduction for array section.
5732           const auto *RHSVar =
5733               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5734           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5735                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5736         } else {
5737           // Emit atomic reduction for array subscript or single variable.
5738           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5739         }
5740       } else {
5741         // Emit as a critical region.
5742         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5743                                            const Expr *, const Expr *) {
5744           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5745           std::string Name = RT.getName({"atomic_reduction"});
5746           RT.emitCriticalRegion(
5747               CGF, Name,
5748               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5749                 Action.Enter(CGF);
5750                 emitReductionCombiner(CGF, E);
5751               },
5752               Loc);
5753         };
5754         if ((*IPriv)->getType()->isArrayType()) {
5755           const auto *LHSVar =
5756               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5757           const auto *RHSVar =
5758               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5759           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5760                                     CritRedGen);
5761         } else {
5762           CritRedGen(CGF, nullptr, nullptr, nullptr);
5763         }
5764       }
5765       ++ILHS;
5766       ++IRHS;
5767       ++IPriv;
5768     }
5769   };
5770   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5771   if (!WithNowait) {
5772     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5773     llvm::Value *EndArgs[] = {
5774         IdentTLoc, // ident_t *<loc>
5775         ThreadId,  // i32 <gtid>
5776         Lock       // kmp_critical_name *&<lock>
5777     };
5778     CommonActionTy Action(nullptr, llvm::None,
5779                           OMPBuilder.getOrCreateRuntimeFunction(
5780                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5781                           EndArgs);
5782     AtomicRCG.setAction(Action);
5783     AtomicRCG(CGF);
5784   } else {
5785     AtomicRCG(CGF);
5786   }
5787 
5788   CGF.EmitBranch(DefaultBB);
5789   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5790 }
5791 
5792 /// Generates unique name for artificial threadprivate variables.
5793 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5794 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5795                                       const Expr *Ref) {
5796   SmallString<256> Buffer;
5797   llvm::raw_svector_ostream Out(Buffer);
5798   const clang::DeclRefExpr *DE;
5799   const VarDecl *D = ::getBaseDecl(Ref, DE);
5800   if (!D)
5801     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5802   D = D->getCanonicalDecl();
5803   std::string Name = CGM.getOpenMPRuntime().getName(
5804       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5805   Out << Prefix << Name << "_"
5806       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5807   return std::string(Out.str());
5808 }
5809 
5810 /// Emits reduction initializer function:
5811 /// \code
5812 /// void @.red_init(void* %arg, void* %orig) {
5813 /// %0 = bitcast void* %arg to <type>*
5814 /// store <type> <init>, <type>* %0
5815 /// ret void
5816 /// }
5817 /// \endcode
5818 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5819                                            SourceLocation Loc,
5820                                            ReductionCodeGen &RCG, unsigned N) {
5821   ASTContext &C = CGM.getContext();
5822   QualType VoidPtrTy = C.VoidPtrTy;
5823   VoidPtrTy.addRestrict();
5824   FunctionArgList Args;
5825   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5826                           ImplicitParamDecl::Other);
5827   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5828                               ImplicitParamDecl::Other);
5829   Args.emplace_back(&Param);
5830   Args.emplace_back(&ParamOrig);
5831   const auto &FnInfo =
5832       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5833   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5834   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5835   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5836                                     Name, &CGM.getModule());
5837   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5838   Fn->setDoesNotRecurse();
5839   CodeGenFunction CGF(CGM);
5840   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5841   Address PrivateAddr = CGF.EmitLoadOfPointer(
5842       CGF.GetAddrOfLocalVar(&Param),
5843       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5844   llvm::Value *Size = nullptr;
5845   // If the size of the reduction item is non-constant, load it from global
5846   // threadprivate variable.
5847   if (RCG.getSizes(N).second) {
5848     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5849         CGF, CGM.getContext().getSizeType(),
5850         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5851     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5852                                 CGM.getContext().getSizeType(), Loc);
5853   }
5854   RCG.emitAggregateType(CGF, N, Size);
5855   LValue OrigLVal;
5856   // If initializer uses initializer from declare reduction construct, emit a
5857   // pointer to the address of the original reduction item (reuired by reduction
5858   // initializer)
5859   if (RCG.usesReductionInitializer(N)) {
5860     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5861     SharedAddr = CGF.EmitLoadOfPointer(
5862         SharedAddr,
5863         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5864     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5865   } else {
5866     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5867         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5868         CGM.getContext().VoidPtrTy);
5869   }
5870   // Emit the initializer:
5871   // %0 = bitcast void* %arg to <type>*
5872   // store <type> <init>, <type>* %0
5873   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5874                          [](CodeGenFunction &) { return false; });
5875   CGF.FinishFunction();
5876   return Fn;
5877 }
5878 
5879 /// Emits reduction combiner function:
5880 /// \code
5881 /// void @.red_comb(void* %arg0, void* %arg1) {
5882 /// %lhs = bitcast void* %arg0 to <type>*
5883 /// %rhs = bitcast void* %arg1 to <type>*
5884 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5885 /// store <type> %2, <type>* %lhs
5886 /// ret void
5887 /// }
5888 /// \endcode
5889 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5890                                            SourceLocation Loc,
5891                                            ReductionCodeGen &RCG, unsigned N,
5892                                            const Expr *ReductionOp,
5893                                            const Expr *LHS, const Expr *RHS,
5894                                            const Expr *PrivateRef) {
5895   ASTContext &C = CGM.getContext();
5896   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5897   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5898   FunctionArgList Args;
5899   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5900                                C.VoidPtrTy, ImplicitParamDecl::Other);
5901   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5902                             ImplicitParamDecl::Other);
5903   Args.emplace_back(&ParamInOut);
5904   Args.emplace_back(&ParamIn);
5905   const auto &FnInfo =
5906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5908   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5910                                     Name, &CGM.getModule());
5911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5912   Fn->setDoesNotRecurse();
5913   CodeGenFunction CGF(CGM);
5914   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5915   llvm::Value *Size = nullptr;
5916   // If the size of the reduction item is non-constant, load it from global
5917   // threadprivate variable.
5918   if (RCG.getSizes(N).second) {
5919     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5920         CGF, CGM.getContext().getSizeType(),
5921         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5922     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5923                                 CGM.getContext().getSizeType(), Loc);
5924   }
5925   RCG.emitAggregateType(CGF, N, Size);
5926   // Remap lhs and rhs variables to the addresses of the function arguments.
5927   // %lhs = bitcast void* %arg0 to <type>*
5928   // %rhs = bitcast void* %arg1 to <type>*
5929   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5930   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5931     // Pull out the pointer to the variable.
5932     Address PtrAddr = CGF.EmitLoadOfPointer(
5933         CGF.GetAddrOfLocalVar(&ParamInOut),
5934         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5935     return CGF.Builder.CreateElementBitCast(
5936         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5937   });
5938   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5939     // Pull out the pointer to the variable.
5940     Address PtrAddr = CGF.EmitLoadOfPointer(
5941         CGF.GetAddrOfLocalVar(&ParamIn),
5942         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5943     return CGF.Builder.CreateElementBitCast(
5944         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5945   });
5946   PrivateScope.Privatize();
5947   // Emit the combiner body:
5948   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5949   // store <type> %2, <type>* %lhs
5950   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5951       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5952       cast<DeclRefExpr>(RHS));
5953   CGF.FinishFunction();
5954   return Fn;
5955 }
5956 
5957 /// Emits reduction finalizer function:
5958 /// \code
5959 /// void @.red_fini(void* %arg) {
5960 /// %0 = bitcast void* %arg to <type>*
5961 /// <destroy>(<type>* %0)
5962 /// ret void
5963 /// }
5964 /// \endcode
5965 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5966                                            SourceLocation Loc,
5967                                            ReductionCodeGen &RCG, unsigned N) {
5968   if (!RCG.needCleanups(N))
5969     return nullptr;
5970   ASTContext &C = CGM.getContext();
5971   FunctionArgList Args;
5972   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5973                           ImplicitParamDecl::Other);
5974   Args.emplace_back(&Param);
5975   const auto &FnInfo =
5976       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5977   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5978   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5979   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5980                                     Name, &CGM.getModule());
5981   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5982   Fn->setDoesNotRecurse();
5983   CodeGenFunction CGF(CGM);
5984   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5985   Address PrivateAddr = CGF.EmitLoadOfPointer(
5986       CGF.GetAddrOfLocalVar(&Param),
5987       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988   llvm::Value *Size = nullptr;
5989   // If the size of the reduction item is non-constant, load it from global
5990   // threadprivate variable.
5991   if (RCG.getSizes(N).second) {
5992     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5993         CGF, CGM.getContext().getSizeType(),
5994         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5995     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5996                                 CGM.getContext().getSizeType(), Loc);
5997   }
5998   RCG.emitAggregateType(CGF, N, Size);
5999   // Emit the finalizer body:
6000   // <destroy>(<type>* %0)
6001   RCG.emitCleanups(CGF, N, PrivateAddr);
6002   CGF.FinishFunction(Loc);
6003   return Fn;
6004 }
6005 
6006 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6007     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6008     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6009   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6010     return nullptr;
6011 
6012   // Build typedef struct:
6013   // kmp_taskred_input {
6014   //   void *reduce_shar; // shared reduction item
6015   //   void *reduce_orig; // original reduction item used for initialization
6016   //   size_t reduce_size; // size of data item
6017   //   void *reduce_init; // data initialization routine
6018   //   void *reduce_fini; // data finalization routine
6019   //   void *reduce_comb; // data combiner routine
6020   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6021   // } kmp_taskred_input_t;
6022   ASTContext &C = CGM.getContext();
6023   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6024   RD->startDefinition();
6025   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6026   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6027   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6028   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6029   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6032       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6033   RD->completeDefinition();
6034   QualType RDType = C.getRecordType(RD);
6035   unsigned Size = Data.ReductionVars.size();
6036   llvm::APInt ArraySize(/*numBits=*/64, Size);
6037   QualType ArrayRDType = C.getConstantArrayType(
6038       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6039   // kmp_task_red_input_t .rd_input.[Size];
6040   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6041   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6042                        Data.ReductionCopies, Data.ReductionOps);
6043   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6044     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6045     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6046                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6047     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6048         TaskRedInput.getPointer(), Idxs,
6049         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6050         ".rd_input.gep.");
6051     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6052     // ElemLVal.reduce_shar = &Shareds[Cnt];
6053     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6054     RCG.emitSharedOrigLValue(CGF, Cnt);
6055     llvm::Value *CastedShared =
6056         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6057     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6058     // ElemLVal.reduce_orig = &Origs[Cnt];
6059     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6060     llvm::Value *CastedOrig =
6061         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6063     RCG.emitAggregateType(CGF, Cnt);
6064     llvm::Value *SizeValInChars;
6065     llvm::Value *SizeVal;
6066     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6067     // We use delayed creation/initialization for VLAs and array sections. It is
6068     // required because runtime does not provide the way to pass the sizes of
6069     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6070     // threadprivate global variables are used to store these values and use
6071     // them in the functions.
6072     bool DelayedCreation = !!SizeVal;
6073     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6074                                                /*isSigned=*/false);
6075     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6076     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6077     // ElemLVal.reduce_init = init;
6078     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6079     llvm::Value *InitAddr =
6080         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6081     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6082     // ElemLVal.reduce_fini = fini;
6083     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6084     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6085     llvm::Value *FiniAddr = Fini
6086                                 ? CGF.EmitCastToVoidPtr(Fini)
6087                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6088     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6089     // ElemLVal.reduce_comb = comb;
6090     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6091     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6092         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6093         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6094     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6095     // ElemLVal.flags = 0;
6096     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6097     if (DelayedCreation) {
6098       CGF.EmitStoreOfScalar(
6099           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6100           FlagsLVal);
6101     } else
6102       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6103                                  FlagsLVal.getType());
6104   }
6105   if (Data.IsReductionWithTaskMod) {
6106     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6107     // is_ws, int num, void *data);
6108     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6109     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6110                                                   CGM.IntTy, /*isSigned=*/true);
6111     llvm::Value *Args[] = {
6112         IdentTLoc, GTid,
6113         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6114                                /*isSigned=*/true),
6115         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6116         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6117             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6118     return CGF.EmitRuntimeCall(
6119         OMPBuilder.getOrCreateRuntimeFunction(
6120             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6121         Args);
6122   }
6123   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6124   llvm::Value *Args[] = {
6125       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6126                                 /*isSigned=*/true),
6127       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6128       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6129                                                       CGM.VoidPtrTy)};
6130   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6131                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6132                              Args);
6133 }
6134 
6135 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6136                                             SourceLocation Loc,
6137                                             bool IsWorksharingReduction) {
6138   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6139   // is_ws, int num, void *data);
6140   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6141   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6142                                                 CGM.IntTy, /*isSigned=*/true);
6143   llvm::Value *Args[] = {IdentTLoc, GTid,
6144                          llvm::ConstantInt::get(CGM.IntTy,
6145                                                 IsWorksharingReduction ? 1 : 0,
6146                                                 /*isSigned=*/true)};
6147   (void)CGF.EmitRuntimeCall(
6148       OMPBuilder.getOrCreateRuntimeFunction(
6149           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6150       Args);
6151 }
6152 
6153 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6154                                               SourceLocation Loc,
6155                                               ReductionCodeGen &RCG,
6156                                               unsigned N) {
6157   auto Sizes = RCG.getSizes(N);
6158   // Emit threadprivate global variable if the type is non-constant
6159   // (Sizes.second = nullptr).
6160   if (Sizes.second) {
6161     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6162                                                      /*isSigned=*/false);
6163     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6164         CGF, CGM.getContext().getSizeType(),
6165         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6166     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6167   }
6168 }
6169 
6170 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6171                                               SourceLocation Loc,
6172                                               llvm::Value *ReductionsPtr,
6173                                               LValue SharedLVal) {
6174   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6175   // *d);
6176   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6177                                                    CGM.IntTy,
6178                                                    /*isSigned=*/true),
6179                          ReductionsPtr,
6180                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6181                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6182   return Address(
6183       CGF.EmitRuntimeCall(
6184           OMPBuilder.getOrCreateRuntimeFunction(
6185               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6186           Args),
6187       SharedLVal.getAlignment());
6188 }
6189 
6190 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6191                                        SourceLocation Loc) {
6192   if (!CGF.HaveInsertPoint())
6193     return;
6194 
6195   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6196     OMPBuilder.createTaskwait(CGF.Builder);
6197   } else {
6198     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6199     // global_tid);
6200     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6201     // Ignore return result until untied tasks are supported.
6202     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6203                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6204                         Args);
6205   }
6206 
6207   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6208     Region->emitUntiedSwitch(CGF);
6209 }
6210 
6211 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6212                                            OpenMPDirectiveKind InnerKind,
6213                                            const RegionCodeGenTy &CodeGen,
6214                                            bool HasCancel) {
6215   if (!CGF.HaveInsertPoint())
6216     return;
6217   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6218   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6219 }
6220 
6221 namespace {
6222 enum RTCancelKind {
6223   CancelNoreq = 0,
6224   CancelParallel = 1,
6225   CancelLoop = 2,
6226   CancelSections = 3,
6227   CancelTaskgroup = 4
6228 };
6229 } // anonymous namespace
6230 
6231 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6232   RTCancelKind CancelKind = CancelNoreq;
6233   if (CancelRegion == OMPD_parallel)
6234     CancelKind = CancelParallel;
6235   else if (CancelRegion == OMPD_for)
6236     CancelKind = CancelLoop;
6237   else if (CancelRegion == OMPD_sections)
6238     CancelKind = CancelSections;
6239   else {
6240     assert(CancelRegion == OMPD_taskgroup);
6241     CancelKind = CancelTaskgroup;
6242   }
6243   return CancelKind;
6244 }
6245 
6246 void CGOpenMPRuntime::emitCancellationPointCall(
6247     CodeGenFunction &CGF, SourceLocation Loc,
6248     OpenMPDirectiveKind CancelRegion) {
6249   if (!CGF.HaveInsertPoint())
6250     return;
6251   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6252   // global_tid, kmp_int32 cncl_kind);
6253   if (auto *OMPRegionInfo =
6254           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6255     // For 'cancellation point taskgroup', the task region info may not have a
6256     // cancel. This may instead happen in another adjacent task.
6257     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6258       llvm::Value *Args[] = {
6259           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6260           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6261       // Ignore return result until untied tasks are supported.
6262       llvm::Value *Result = CGF.EmitRuntimeCall(
6263           OMPBuilder.getOrCreateRuntimeFunction(
6264               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6265           Args);
6266       // if (__kmpc_cancellationpoint()) {
6267       //   exit from construct;
6268       // }
6269       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6270       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6271       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6272       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6273       CGF.EmitBlock(ExitBB);
6274       // exit from construct;
6275       CodeGenFunction::JumpDest CancelDest =
6276           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6277       CGF.EmitBranchThroughCleanup(CancelDest);
6278       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6279     }
6280   }
6281 }
6282 
6283 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6284                                      const Expr *IfCond,
6285                                      OpenMPDirectiveKind CancelRegion) {
6286   if (!CGF.HaveInsertPoint())
6287     return;
6288   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6289   // kmp_int32 cncl_kind);
6290   auto &M = CGM.getModule();
6291   if (auto *OMPRegionInfo =
6292           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6293     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6294                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6295       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6296       llvm::Value *Args[] = {
6297           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6298           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6299       // Ignore return result until untied tasks are supported.
6300       llvm::Value *Result = CGF.EmitRuntimeCall(
6301           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6302       // if (__kmpc_cancel()) {
6303       //   exit from construct;
6304       // }
6305       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6306       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6307       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6308       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6309       CGF.EmitBlock(ExitBB);
6310       // exit from construct;
6311       CodeGenFunction::JumpDest CancelDest =
6312           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6313       CGF.EmitBranchThroughCleanup(CancelDest);
6314       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6315     };
6316     if (IfCond) {
6317       emitIfClause(CGF, IfCond, ThenGen,
6318                    [](CodeGenFunction &, PrePostActionTy &) {});
6319     } else {
6320       RegionCodeGenTy ThenRCG(ThenGen);
6321       ThenRCG(CGF);
6322     }
6323   }
6324 }
6325 
6326 namespace {
6327 /// Cleanup action for uses_allocators support.
6328 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6329   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6330 
6331 public:
6332   OMPUsesAllocatorsActionTy(
6333       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6334       : Allocators(Allocators) {}
6335   void Enter(CodeGenFunction &CGF) override {
6336     if (!CGF.HaveInsertPoint())
6337       return;
6338     for (const auto &AllocatorData : Allocators) {
6339       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6340           CGF, AllocatorData.first, AllocatorData.second);
6341     }
6342   }
6343   void Exit(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6348                                                         AllocatorData.first);
6349     }
6350   }
6351 };
6352 } // namespace
6353 
6354 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6355     const OMPExecutableDirective &D, StringRef ParentName,
6356     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6357     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6358   assert(!ParentName.empty() && "Invalid target region parent name!");
6359   HasEmittedTargetRegion = true;
6360   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6361   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6362     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6363       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6364       if (!D.AllocatorTraits)
6365         continue;
6366       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6367     }
6368   }
6369   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6370   CodeGen.setAction(UsesAllocatorAction);
6371   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6372                                    IsOffloadEntry, CodeGen);
6373 }
6374 
6375 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6376                                              const Expr *Allocator,
6377                                              const Expr *AllocatorTraits) {
6378   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6379   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6380   // Use default memspace handle.
6381   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6382   llvm::Value *NumTraits = llvm::ConstantInt::get(
6383       CGF.IntTy, cast<ConstantArrayType>(
6384                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6385                      ->getSize()
6386                      .getLimitedValue());
6387   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6388   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6389       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6390   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6391                                            AllocatorTraitsLVal.getBaseInfo(),
6392                                            AllocatorTraitsLVal.getTBAAInfo());
6393   llvm::Value *Traits =
6394       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6395 
6396   llvm::Value *AllocatorVal =
6397       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6398                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6399                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6400   // Store to allocator.
6401   CGF.EmitVarDecl(*cast<VarDecl>(
6402       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6403   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6404   AllocatorVal =
6405       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6406                                Allocator->getType(), Allocator->getExprLoc());
6407   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6408 }
6409 
6410 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6411                                              const Expr *Allocator) {
6412   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6413   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6414   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6415   llvm::Value *AllocatorVal =
6416       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6417   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6418                                           CGF.getContext().VoidPtrTy,
6419                                           Allocator->getExprLoc());
6420   (void)CGF.EmitRuntimeCall(
6421       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6422                                             OMPRTL___kmpc_destroy_allocator),
6423       {ThreadId, AllocatorVal});
6424 }
6425 
6426 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6427     const OMPExecutableDirective &D, StringRef ParentName,
6428     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6429     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6430   // Create a unique name for the entry function using the source location
6431   // information of the current target region. The name will be something like:
6432   //
6433   // __omp_offloading_DD_FFFF_PP_lBB
6434   //
6435   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6436   // mangled name of the function that encloses the target region and BB is the
6437   // line number of the target region.
6438 
6439   unsigned DeviceID;
6440   unsigned FileID;
6441   unsigned Line;
6442   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6443                            Line);
6444   SmallString<64> EntryFnName;
6445   {
6446     llvm::raw_svector_ostream OS(EntryFnName);
6447     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6448        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6449   }
6450 
6451   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6452 
6453   CodeGenFunction CGF(CGM, true);
6454   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6455   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6456 
6457   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6458 
6459   // If this target outline function is not an offload entry, we don't need to
6460   // register it.
6461   if (!IsOffloadEntry)
6462     return;
6463 
6464   // The target region ID is used by the runtime library to identify the current
6465   // target region, so it only has to be unique and not necessarily point to
6466   // anything. It could be the pointer to the outlined function that implements
6467   // the target region, but we aren't using that so that the compiler doesn't
6468   // need to keep that, and could therefore inline the host function if proven
6469   // worthwhile during optimization. In the other hand, if emitting code for the
6470   // device, the ID has to be the function address so that it can retrieved from
6471   // the offloading entry and launched by the runtime library. We also mark the
6472   // outlined function to have external linkage in case we are emitting code for
6473   // the device, because these functions will be entry points to the device.
6474 
6475   if (CGM.getLangOpts().OpenMPIsDevice) {
6476     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6477     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6478     OutlinedFn->setDSOLocal(false);
6479     if (CGM.getTriple().isAMDGCN())
6480       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6481   } else {
6482     std::string Name = getName({EntryFnName, "region_id"});
6483     OutlinedFnID = new llvm::GlobalVariable(
6484         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6485         llvm::GlobalValue::WeakAnyLinkage,
6486         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6487   }
6488 
6489   // Register the information for the entry associated with this target region.
6490   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6491       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6492       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6493 }
6494 
6495 /// Checks if the expression is constant or does not have non-trivial function
6496 /// calls.
6497 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6498   // We can skip constant expressions.
6499   // We can skip expressions with trivial calls or simple expressions.
6500   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6501           !E->hasNonTrivialCall(Ctx)) &&
6502          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6503 }
6504 
6505 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6506                                                     const Stmt *Body) {
6507   const Stmt *Child = Body->IgnoreContainers();
6508   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6509     Child = nullptr;
6510     for (const Stmt *S : C->body()) {
6511       if (const auto *E = dyn_cast<Expr>(S)) {
6512         if (isTrivial(Ctx, E))
6513           continue;
6514       }
6515       // Some of the statements can be ignored.
6516       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6517           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6518         continue;
6519       // Analyze declarations.
6520       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6521         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6522               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6523                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6524                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6525                   isa<UsingDirectiveDecl>(D) ||
6526                   isa<OMPDeclareReductionDecl>(D) ||
6527                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6528                 return true;
6529               const auto *VD = dyn_cast<VarDecl>(D);
6530               if (!VD)
6531                 return false;
6532               return VD->isConstexpr() ||
6533                      ((VD->getType().isTrivialType(Ctx) ||
6534                        VD->getType()->isReferenceType()) &&
6535                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6536             }))
6537           continue;
6538       }
6539       // Found multiple children - cannot get the one child only.
6540       if (Child)
6541         return nullptr;
6542       Child = S;
6543     }
6544     if (Child)
6545       Child = Child->IgnoreContainers();
6546   }
6547   return Child;
6548 }
6549 
6550 /// Emit the number of teams for a target directive.  Inspect the num_teams
6551 /// clause associated with a teams construct combined or closely nested
6552 /// with the target directive.
6553 ///
6554 /// Emit a team of size one for directives such as 'target parallel' that
6555 /// have no associated teams construct.
6556 ///
6557 /// Otherwise, return nullptr.
6558 static llvm::Value *
6559 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6560                                const OMPExecutableDirective &D) {
6561   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6562          "Clauses associated with the teams directive expected to be emitted "
6563          "only for the host!");
6564   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6565   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6566          "Expected target-based executable directive.");
6567   CGBuilderTy &Bld = CGF.Builder;
6568   switch (DirectiveKind) {
6569   case OMPD_target: {
6570     const auto *CS = D.getInnermostCapturedStmt();
6571     const auto *Body =
6572         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6573     const Stmt *ChildStmt =
6574         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6575     if (const auto *NestedDir =
6576             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6577       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6578         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6579           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6580           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6581           const Expr *NumTeams =
6582               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6583           llvm::Value *NumTeamsVal =
6584               CGF.EmitScalarExpr(NumTeams,
6585                                  /*IgnoreResultAssign*/ true);
6586           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6587                                    /*isSigned=*/true);
6588         }
6589         return Bld.getInt32(0);
6590       }
6591       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6592           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6593         return Bld.getInt32(1);
6594       return Bld.getInt32(0);
6595     }
6596     return nullptr;
6597   }
6598   case OMPD_target_teams:
6599   case OMPD_target_teams_distribute:
6600   case OMPD_target_teams_distribute_simd:
6601   case OMPD_target_teams_distribute_parallel_for:
6602   case OMPD_target_teams_distribute_parallel_for_simd: {
6603     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6604       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6605       const Expr *NumTeams =
6606           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6607       llvm::Value *NumTeamsVal =
6608           CGF.EmitScalarExpr(NumTeams,
6609                              /*IgnoreResultAssign*/ true);
6610       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6611                                /*isSigned=*/true);
6612     }
6613     return Bld.getInt32(0);
6614   }
6615   case OMPD_target_parallel:
6616   case OMPD_target_parallel_for:
6617   case OMPD_target_parallel_for_simd:
6618   case OMPD_target_simd:
6619     return Bld.getInt32(1);
6620   case OMPD_parallel:
6621   case OMPD_for:
6622   case OMPD_parallel_for:
6623   case OMPD_parallel_master:
6624   case OMPD_parallel_sections:
6625   case OMPD_for_simd:
6626   case OMPD_parallel_for_simd:
6627   case OMPD_cancel:
6628   case OMPD_cancellation_point:
6629   case OMPD_ordered:
6630   case OMPD_threadprivate:
6631   case OMPD_allocate:
6632   case OMPD_task:
6633   case OMPD_simd:
6634   case OMPD_tile:
6635   case OMPD_sections:
6636   case OMPD_section:
6637   case OMPD_single:
6638   case OMPD_master:
6639   case OMPD_critical:
6640   case OMPD_taskyield:
6641   case OMPD_barrier:
6642   case OMPD_taskwait:
6643   case OMPD_taskgroup:
6644   case OMPD_atomic:
6645   case OMPD_flush:
6646   case OMPD_depobj:
6647   case OMPD_scan:
6648   case OMPD_teams:
6649   case OMPD_target_data:
6650   case OMPD_target_exit_data:
6651   case OMPD_target_enter_data:
6652   case OMPD_distribute:
6653   case OMPD_distribute_simd:
6654   case OMPD_distribute_parallel_for:
6655   case OMPD_distribute_parallel_for_simd:
6656   case OMPD_teams_distribute:
6657   case OMPD_teams_distribute_simd:
6658   case OMPD_teams_distribute_parallel_for:
6659   case OMPD_teams_distribute_parallel_for_simd:
6660   case OMPD_target_update:
6661   case OMPD_declare_simd:
6662   case OMPD_declare_variant:
6663   case OMPD_begin_declare_variant:
6664   case OMPD_end_declare_variant:
6665   case OMPD_declare_target:
6666   case OMPD_end_declare_target:
6667   case OMPD_declare_reduction:
6668   case OMPD_declare_mapper:
6669   case OMPD_taskloop:
6670   case OMPD_taskloop_simd:
6671   case OMPD_master_taskloop:
6672   case OMPD_master_taskloop_simd:
6673   case OMPD_parallel_master_taskloop:
6674   case OMPD_parallel_master_taskloop_simd:
6675   case OMPD_requires:
6676   case OMPD_unknown:
6677     break;
6678   default:
6679     break;
6680   }
6681   llvm_unreachable("Unexpected directive kind.");
6682 }
6683 
6684 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6685                                   llvm::Value *DefaultThreadLimitVal) {
6686   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6687       CGF.getContext(), CS->getCapturedStmt());
6688   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6689     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6690       llvm::Value *NumThreads = nullptr;
6691       llvm::Value *CondVal = nullptr;
6692       // Handle if clause. If if clause present, the number of threads is
6693       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6694       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6695         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6696         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6697         const OMPIfClause *IfClause = nullptr;
6698         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6699           if (C->getNameModifier() == OMPD_unknown ||
6700               C->getNameModifier() == OMPD_parallel) {
6701             IfClause = C;
6702             break;
6703           }
6704         }
6705         if (IfClause) {
6706           const Expr *Cond = IfClause->getCondition();
6707           bool Result;
6708           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6709             if (!Result)
6710               return CGF.Builder.getInt32(1);
6711           } else {
6712             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6713             if (const auto *PreInit =
6714                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6715               for (const auto *I : PreInit->decls()) {
6716                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6717                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6718                 } else {
6719                   CodeGenFunction::AutoVarEmission Emission =
6720                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6721                   CGF.EmitAutoVarCleanups(Emission);
6722                 }
6723               }
6724             }
6725             CondVal = CGF.EvaluateExprAsBool(Cond);
6726           }
6727         }
6728       }
6729       // Check the value of num_threads clause iff if clause was not specified
6730       // or is not evaluated to false.
6731       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6732         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6733         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6734         const auto *NumThreadsClause =
6735             Dir->getSingleClause<OMPNumThreadsClause>();
6736         CodeGenFunction::LexicalScope Scope(
6737             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6738         if (const auto *PreInit =
6739                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6740           for (const auto *I : PreInit->decls()) {
6741             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6742               CGF.EmitVarDecl(cast<VarDecl>(*I));
6743             } else {
6744               CodeGenFunction::AutoVarEmission Emission =
6745                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6746               CGF.EmitAutoVarCleanups(Emission);
6747             }
6748           }
6749         }
6750         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6751         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6752                                                /*isSigned=*/false);
6753         if (DefaultThreadLimitVal)
6754           NumThreads = CGF.Builder.CreateSelect(
6755               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6756               DefaultThreadLimitVal, NumThreads);
6757       } else {
6758         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6759                                            : CGF.Builder.getInt32(0);
6760       }
6761       // Process condition of the if clause.
6762       if (CondVal) {
6763         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6764                                               CGF.Builder.getInt32(1));
6765       }
6766       return NumThreads;
6767     }
6768     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6769       return CGF.Builder.getInt32(1);
6770     return DefaultThreadLimitVal;
6771   }
6772   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6773                                : CGF.Builder.getInt32(0);
6774 }
6775 
6776 /// Emit the number of threads for a target directive.  Inspect the
6777 /// thread_limit clause associated with a teams construct combined or closely
6778 /// nested with the target directive.
6779 ///
6780 /// Emit the num_threads clause for directives such as 'target parallel' that
6781 /// have no associated teams construct.
6782 ///
6783 /// Otherwise, return nullptr.
6784 static llvm::Value *
6785 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6786                                  const OMPExecutableDirective &D) {
6787   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6788          "Clauses associated with the teams directive expected to be emitted "
6789          "only for the host!");
6790   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6791   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6792          "Expected target-based executable directive.");
6793   CGBuilderTy &Bld = CGF.Builder;
6794   llvm::Value *ThreadLimitVal = nullptr;
6795   llvm::Value *NumThreadsVal = nullptr;
6796   switch (DirectiveKind) {
6797   case OMPD_target: {
6798     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6799     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6800       return NumThreads;
6801     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6802         CGF.getContext(), CS->getCapturedStmt());
6803     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6804       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6805         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6806         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6807         const auto *ThreadLimitClause =
6808             Dir->getSingleClause<OMPThreadLimitClause>();
6809         CodeGenFunction::LexicalScope Scope(
6810             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6811         if (const auto *PreInit =
6812                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6813           for (const auto *I : PreInit->decls()) {
6814             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6815               CGF.EmitVarDecl(cast<VarDecl>(*I));
6816             } else {
6817               CodeGenFunction::AutoVarEmission Emission =
6818                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6819               CGF.EmitAutoVarCleanups(Emission);
6820             }
6821           }
6822         }
6823         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6824             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6825         ThreadLimitVal =
6826             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6827       }
6828       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6829           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6830         CS = Dir->getInnermostCapturedStmt();
6831         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6832             CGF.getContext(), CS->getCapturedStmt());
6833         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6834       }
6835       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6836           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6837         CS = Dir->getInnermostCapturedStmt();
6838         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6839           return NumThreads;
6840       }
6841       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6842         return Bld.getInt32(1);
6843     }
6844     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6845   }
6846   case OMPD_target_teams: {
6847     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6848       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6849       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6850       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6851           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6852       ThreadLimitVal =
6853           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6854     }
6855     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6856     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6857       return NumThreads;
6858     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6859         CGF.getContext(), CS->getCapturedStmt());
6860     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6861       if (Dir->getDirectiveKind() == OMPD_distribute) {
6862         CS = Dir->getInnermostCapturedStmt();
6863         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6864           return NumThreads;
6865       }
6866     }
6867     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6868   }
6869   case OMPD_target_teams_distribute:
6870     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6871       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6872       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6873       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6874           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6875       ThreadLimitVal =
6876           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6877     }
6878     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6879   case OMPD_target_parallel:
6880   case OMPD_target_parallel_for:
6881   case OMPD_target_parallel_for_simd:
6882   case OMPD_target_teams_distribute_parallel_for:
6883   case OMPD_target_teams_distribute_parallel_for_simd: {
6884     llvm::Value *CondVal = nullptr;
6885     // Handle if clause. If if clause present, the number of threads is
6886     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6887     if (D.hasClausesOfKind<OMPIfClause>()) {
6888       const OMPIfClause *IfClause = nullptr;
6889       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6890         if (C->getNameModifier() == OMPD_unknown ||
6891             C->getNameModifier() == OMPD_parallel) {
6892           IfClause = C;
6893           break;
6894         }
6895       }
6896       if (IfClause) {
6897         const Expr *Cond = IfClause->getCondition();
6898         bool Result;
6899         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6900           if (!Result)
6901             return Bld.getInt32(1);
6902         } else {
6903           CodeGenFunction::RunCleanupsScope Scope(CGF);
6904           CondVal = CGF.EvaluateExprAsBool(Cond);
6905         }
6906       }
6907     }
6908     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6909       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6910       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6911       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6912           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6913       ThreadLimitVal =
6914           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6915     }
6916     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6917       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6918       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6919       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6920           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6921       NumThreadsVal =
6922           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6923       ThreadLimitVal = ThreadLimitVal
6924                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6925                                                                 ThreadLimitVal),
6926                                               NumThreadsVal, ThreadLimitVal)
6927                            : NumThreadsVal;
6928     }
6929     if (!ThreadLimitVal)
6930       ThreadLimitVal = Bld.getInt32(0);
6931     if (CondVal)
6932       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6933     return ThreadLimitVal;
6934   }
6935   case OMPD_target_teams_distribute_simd:
6936   case OMPD_target_simd:
6937     return Bld.getInt32(1);
6938   case OMPD_parallel:
6939   case OMPD_for:
6940   case OMPD_parallel_for:
6941   case OMPD_parallel_master:
6942   case OMPD_parallel_sections:
6943   case OMPD_for_simd:
6944   case OMPD_parallel_for_simd:
6945   case OMPD_cancel:
6946   case OMPD_cancellation_point:
6947   case OMPD_ordered:
6948   case OMPD_threadprivate:
6949   case OMPD_allocate:
6950   case OMPD_task:
6951   case OMPD_simd:
6952   case OMPD_tile:
6953   case OMPD_sections:
6954   case OMPD_section:
6955   case OMPD_single:
6956   case OMPD_master:
6957   case OMPD_critical:
6958   case OMPD_taskyield:
6959   case OMPD_barrier:
6960   case OMPD_taskwait:
6961   case OMPD_taskgroup:
6962   case OMPD_atomic:
6963   case OMPD_flush:
6964   case OMPD_depobj:
6965   case OMPD_scan:
6966   case OMPD_teams:
6967   case OMPD_target_data:
6968   case OMPD_target_exit_data:
6969   case OMPD_target_enter_data:
6970   case OMPD_distribute:
6971   case OMPD_distribute_simd:
6972   case OMPD_distribute_parallel_for:
6973   case OMPD_distribute_parallel_for_simd:
6974   case OMPD_teams_distribute:
6975   case OMPD_teams_distribute_simd:
6976   case OMPD_teams_distribute_parallel_for:
6977   case OMPD_teams_distribute_parallel_for_simd:
6978   case OMPD_target_update:
6979   case OMPD_declare_simd:
6980   case OMPD_declare_variant:
6981   case OMPD_begin_declare_variant:
6982   case OMPD_end_declare_variant:
6983   case OMPD_declare_target:
6984   case OMPD_end_declare_target:
6985   case OMPD_declare_reduction:
6986   case OMPD_declare_mapper:
6987   case OMPD_taskloop:
6988   case OMPD_taskloop_simd:
6989   case OMPD_master_taskloop:
6990   case OMPD_master_taskloop_simd:
6991   case OMPD_parallel_master_taskloop:
6992   case OMPD_parallel_master_taskloop_simd:
6993   case OMPD_requires:
6994   case OMPD_unknown:
6995     break;
6996   default:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr or use_device_addr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// 0x800 is reserved for compatibility with XLC.
7047     /// Produce a runtime error if the data is not already allocated.
7048     OMP_MAP_PRESENT = 0x1000,
7049     /// Signal that the runtime library should use args as an array of
7050     /// descriptor_dim pointers and use args_size as dims. Used when we have
7051     /// non-contiguous list items in target update directive
7052     OMP_MAP_NON_CONTIG = 0x100000000000,
7053     /// The 16 MSBs of the flags indicate whether the entry is member of some
7054     /// struct/class.
7055     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7056     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7057   };
7058 
7059   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7060   static unsigned getFlagMemberOffset() {
7061     unsigned Offset = 0;
7062     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7063          Remain = Remain >> 1)
7064       Offset++;
7065     return Offset;
7066   }
7067 
7068   /// Class that holds debugging information for a data mapping to be passed to
7069   /// the runtime library.
7070   class MappingExprInfo {
7071     /// The variable declaration used for the data mapping.
7072     const ValueDecl *MapDecl = nullptr;
7073     /// The original expression used in the map clause, or null if there is
7074     /// none.
7075     const Expr *MapExpr = nullptr;
7076 
7077   public:
7078     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7079         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7080 
7081     const ValueDecl *getMapDecl() const { return MapDecl; }
7082     const Expr *getMapExpr() const { return MapExpr; }
7083   };
7084 
7085   /// Class that associates information with a base pointer to be passed to the
7086   /// runtime library.
7087   class BasePointerInfo {
7088     /// The base pointer.
7089     llvm::Value *Ptr = nullptr;
7090     /// The base declaration that refers to this device pointer, or null if
7091     /// there is none.
7092     const ValueDecl *DevPtrDecl = nullptr;
7093 
7094   public:
7095     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7096         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7097     llvm::Value *operator*() const { return Ptr; }
7098     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7099     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7100   };
7101 
7102   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7103   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7104   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7105   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7106   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7107   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7108   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7109 
7110   /// This structure contains combined information generated for mappable
7111   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7112   /// mappers, and non-contiguous information.
7113   struct MapCombinedInfoTy {
7114     struct StructNonContiguousInfo {
7115       bool IsNonContiguous = false;
7116       MapDimArrayTy Dims;
7117       MapNonContiguousArrayTy Offsets;
7118       MapNonContiguousArrayTy Counts;
7119       MapNonContiguousArrayTy Strides;
7120     };
7121     MapExprsArrayTy Exprs;
7122     MapBaseValuesArrayTy BasePointers;
7123     MapValuesArrayTy Pointers;
7124     MapValuesArrayTy Sizes;
7125     MapFlagsArrayTy Types;
7126     MapMappersArrayTy Mappers;
7127     StructNonContiguousInfo NonContigInfo;
7128 
7129     /// Append arrays in \a CurInfo.
7130     void append(MapCombinedInfoTy &CurInfo) {
7131       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7132       BasePointers.append(CurInfo.BasePointers.begin(),
7133                           CurInfo.BasePointers.end());
7134       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7135       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7136       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7137       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7138       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7139                                  CurInfo.NonContigInfo.Dims.end());
7140       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7141                                     CurInfo.NonContigInfo.Offsets.end());
7142       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7143                                    CurInfo.NonContigInfo.Counts.end());
7144       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7145                                     CurInfo.NonContigInfo.Strides.end());
7146     }
7147   };
7148 
7149   /// Map between a struct and the its lowest & highest elements which have been
7150   /// mapped.
7151   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7152   ///                    HE(FieldIndex, Pointer)}
7153   struct StructRangeInfoTy {
7154     MapCombinedInfoTy PreliminaryMapData;
7155     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7156         0, Address::invalid()};
7157     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7158         0, Address::invalid()};
7159     Address Base = Address::invalid();
7160     Address LB = Address::invalid();
7161     bool IsArraySection = false;
7162     bool HasCompleteRecord = false;
7163   };
7164 
7165 private:
7166   /// Kind that defines how a device pointer has to be returned.
7167   struct MapInfo {
7168     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7169     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7170     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7171     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7172     bool ReturnDevicePointer = false;
7173     bool IsImplicit = false;
7174     const ValueDecl *Mapper = nullptr;
7175     const Expr *VarRef = nullptr;
7176     bool ForDeviceAddr = false;
7177 
7178     MapInfo() = default;
7179     MapInfo(
7180         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7181         OpenMPMapClauseKind MapType,
7182         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7183         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7184         bool ReturnDevicePointer, bool IsImplicit,
7185         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7186         bool ForDeviceAddr = false)
7187         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7188           MotionModifiers(MotionModifiers),
7189           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7190           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7191   };
7192 
7193   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7194   /// member and there is no map information about it, then emission of that
7195   /// entry is deferred until the whole struct has been processed.
7196   struct DeferredDevicePtrEntryTy {
7197     const Expr *IE = nullptr;
7198     const ValueDecl *VD = nullptr;
7199     bool ForDeviceAddr = false;
7200 
7201     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7202                              bool ForDeviceAddr)
7203         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7204   };
7205 
7206   /// The target directive from where the mappable clauses were extracted. It
7207   /// is either a executable directive or a user-defined mapper directive.
7208   llvm::PointerUnion<const OMPExecutableDirective *,
7209                      const OMPDeclareMapperDecl *>
7210       CurDir;
7211 
7212   /// Function the directive is being generated for.
7213   CodeGenFunction &CGF;
7214 
7215   /// Set of all first private variables in the current directive.
7216   /// bool data is set to true if the variable is implicitly marked as
7217   /// firstprivate, false otherwise.
7218   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7219 
7220   /// Map between device pointer declarations and their expression components.
7221   /// The key value for declarations in 'this' is null.
7222   llvm::DenseMap<
7223       const ValueDecl *,
7224       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7225       DevPointersMap;
7226 
7227   llvm::Value *getExprTypeSize(const Expr *E) const {
7228     QualType ExprTy = E->getType().getCanonicalType();
7229 
7230     // Calculate the size for array shaping expression.
7231     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7232       llvm::Value *Size =
7233           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7234       for (const Expr *SE : OAE->getDimensions()) {
7235         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7236         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7237                                       CGF.getContext().getSizeType(),
7238                                       SE->getExprLoc());
7239         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7240       }
7241       return Size;
7242     }
7243 
7244     // Reference types are ignored for mapping purposes.
7245     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7246       ExprTy = RefTy->getPointeeType().getCanonicalType();
7247 
7248     // Given that an array section is considered a built-in type, we need to
7249     // do the calculation based on the length of the section instead of relying
7250     // on CGF.getTypeSize(E->getType()).
7251     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7252       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7253                             OAE->getBase()->IgnoreParenImpCasts())
7254                             .getCanonicalType();
7255 
7256       // If there is no length associated with the expression and lower bound is
7257       // not specified too, that means we are using the whole length of the
7258       // base.
7259       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7260           !OAE->getLowerBound())
7261         return CGF.getTypeSize(BaseTy);
7262 
7263       llvm::Value *ElemSize;
7264       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7265         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7266       } else {
7267         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7268         assert(ATy && "Expecting array type if not a pointer type.");
7269         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7270       }
7271 
7272       // If we don't have a length at this point, that is because we have an
7273       // array section with a single element.
7274       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7275         return ElemSize;
7276 
7277       if (const Expr *LenExpr = OAE->getLength()) {
7278         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7279         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7280                                              CGF.getContext().getSizeType(),
7281                                              LenExpr->getExprLoc());
7282         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7283       }
7284       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7285              OAE->getLowerBound() && "expected array_section[lb:].");
7286       // Size = sizetype - lb * elemtype;
7287       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7288       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7289       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7290                                        CGF.getContext().getSizeType(),
7291                                        OAE->getLowerBound()->getExprLoc());
7292       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7293       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7294       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7295       LengthVal = CGF.Builder.CreateSelect(
7296           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7297       return LengthVal;
7298     }
7299     return CGF.getTypeSize(ExprTy);
7300   }
7301 
7302   /// Return the corresponding bits for a given map clause modifier. Add
7303   /// a flag marking the map as a pointer if requested. Add a flag marking the
7304   /// map as the first one of a series of maps that relate to the same map
7305   /// expression.
7306   OpenMPOffloadMappingFlags getMapTypeBits(
7307       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7308       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7309       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7310     OpenMPOffloadMappingFlags Bits =
7311         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7312     switch (MapType) {
7313     case OMPC_MAP_alloc:
7314     case OMPC_MAP_release:
7315       // alloc and release is the default behavior in the runtime library,  i.e.
7316       // if we don't pass any bits alloc/release that is what the runtime is
7317       // going to do. Therefore, we don't need to signal anything for these two
7318       // type modifiers.
7319       break;
7320     case OMPC_MAP_to:
7321       Bits |= OMP_MAP_TO;
7322       break;
7323     case OMPC_MAP_from:
7324       Bits |= OMP_MAP_FROM;
7325       break;
7326     case OMPC_MAP_tofrom:
7327       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7328       break;
7329     case OMPC_MAP_delete:
7330       Bits |= OMP_MAP_DELETE;
7331       break;
7332     case OMPC_MAP_unknown:
7333       llvm_unreachable("Unexpected map type!");
7334     }
7335     if (AddPtrFlag)
7336       Bits |= OMP_MAP_PTR_AND_OBJ;
7337     if (AddIsTargetParamFlag)
7338       Bits |= OMP_MAP_TARGET_PARAM;
7339     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7340         != MapModifiers.end())
7341       Bits |= OMP_MAP_ALWAYS;
7342     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7343         != MapModifiers.end())
7344       Bits |= OMP_MAP_CLOSE;
7345     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7346             MapModifiers.end() ||
7347         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7348             MotionModifiers.end())
7349       Bits |= OMP_MAP_PRESENT;
7350     if (IsNonContiguous)
7351       Bits |= OMP_MAP_NON_CONTIG;
7352     return Bits;
7353   }
7354 
7355   /// Return true if the provided expression is a final array section. A
7356   /// final array section, is one whose length can't be proved to be one.
7357   bool isFinalArraySectionExpression(const Expr *E) const {
7358     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7359 
7360     // It is not an array section and therefore not a unity-size one.
7361     if (!OASE)
7362       return false;
7363 
7364     // An array section with no colon always refer to a single element.
7365     if (OASE->getColonLocFirst().isInvalid())
7366       return false;
7367 
7368     const Expr *Length = OASE->getLength();
7369 
7370     // If we don't have a length we have to check if the array has size 1
7371     // for this dimension. Also, we should always expect a length if the
7372     // base type is pointer.
7373     if (!Length) {
7374       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7375                              OASE->getBase()->IgnoreParenImpCasts())
7376                              .getCanonicalType();
7377       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7378         return ATy->getSize().getSExtValue() != 1;
7379       // If we don't have a constant dimension length, we have to consider
7380       // the current section as having any size, so it is not necessarily
7381       // unitary. If it happen to be unity size, that's user fault.
7382       return true;
7383     }
7384 
7385     // Check if the length evaluates to 1.
7386     Expr::EvalResult Result;
7387     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7388       return true; // Can have more that size 1.
7389 
7390     llvm::APSInt ConstLength = Result.Val.getInt();
7391     return ConstLength.getSExtValue() != 1;
7392   }
7393 
7394   /// Generate the base pointers, section pointers, sizes, map type bits, and
7395   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7396   /// map type, map or motion modifiers, and expression components.
7397   /// \a IsFirstComponent should be set to true if the provided set of
7398   /// components is the first associated with a capture.
7399   void generateInfoForComponentList(
7400       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7401       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7402       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7403       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7404       bool IsFirstComponentList, bool IsImplicit,
7405       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7406       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7407       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7408           OverlappedElements = llvm::None) const {
7409     // The following summarizes what has to be generated for each map and the
7410     // types below. The generated information is expressed in this order:
7411     // base pointer, section pointer, size, flags
7412     // (to add to the ones that come from the map type and modifier).
7413     //
7414     // double d;
7415     // int i[100];
7416     // float *p;
7417     //
7418     // struct S1 {
7419     //   int i;
7420     //   float f[50];
7421     // }
7422     // struct S2 {
7423     //   int i;
7424     //   float f[50];
7425     //   S1 s;
7426     //   double *p;
7427     //   struct S2 *ps;
7428     // }
7429     // S2 s;
7430     // S2 *ps;
7431     //
7432     // map(d)
7433     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7434     //
7435     // map(i)
7436     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7437     //
7438     // map(i[1:23])
7439     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7440     //
7441     // map(p)
7442     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7443     //
7444     // map(p[1:24])
7445     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7446     // in unified shared memory mode or for local pointers
7447     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7448     //
7449     // map(s)
7450     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7451     //
7452     // map(s.i)
7453     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7454     //
7455     // map(s.s.f)
7456     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7457     //
7458     // map(s.p)
7459     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7460     //
7461     // map(to: s.p[:22])
7462     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7463     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7464     // &(s.p), &(s.p[0]), 22*sizeof(double),
7465     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7466     // (*) alloc space for struct members, only this is a target parameter
7467     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7468     //      optimizes this entry out, same in the examples below)
7469     // (***) map the pointee (map: to)
7470     //
7471     // map(s.ps)
7472     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7473     //
7474     // map(from: s.ps->s.i)
7475     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7476     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7477     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7478     //
7479     // map(to: s.ps->ps)
7480     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7481     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7482     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7483     //
7484     // map(s.ps->ps->ps)
7485     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7486     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7487     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7488     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7489     //
7490     // map(to: s.ps->ps->s.f[:22])
7491     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7492     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7493     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7494     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7495     //
7496     // map(ps)
7497     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7498     //
7499     // map(ps->i)
7500     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7501     //
7502     // map(ps->s.f)
7503     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7504     //
7505     // map(from: ps->p)
7506     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7507     //
7508     // map(to: ps->p[:22])
7509     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7510     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7511     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7512     //
7513     // map(ps->ps)
7514     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7515     //
7516     // map(from: ps->ps->s.i)
7517     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7518     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7519     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7520     //
7521     // map(from: ps->ps->ps)
7522     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7523     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7524     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7525     //
7526     // map(ps->ps->ps->ps)
7527     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7528     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7529     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7530     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7531     //
7532     // map(to: ps->ps->ps->s.f[:22])
7533     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7534     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7535     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7536     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7537     //
7538     // map(to: s.f[:22]) map(from: s.p[:33])
7539     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7540     //     sizeof(double*) (**), TARGET_PARAM
7541     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7542     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7543     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7544     // (*) allocate contiguous space needed to fit all mapped members even if
7545     //     we allocate space for members not mapped (in this example,
7546     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7547     //     them as well because they fall between &s.f[0] and &s.p)
7548     //
7549     // map(from: s.f[:22]) map(to: ps->p[:33])
7550     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7551     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7552     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7553     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7554     // (*) the struct this entry pertains to is the 2nd element in the list of
7555     //     arguments, hence MEMBER_OF(2)
7556     //
7557     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7558     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7559     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7560     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7561     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7562     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7563     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7564     // (*) the struct this entry pertains to is the 4th element in the list
7565     //     of arguments, hence MEMBER_OF(4)
7566 
7567     // Track if the map information being generated is the first for a capture.
7568     bool IsCaptureFirstInfo = IsFirstComponentList;
7569     // When the variable is on a declare target link or in a to clause with
7570     // unified memory, a reference is needed to hold the host/device address
7571     // of the variable.
7572     bool RequiresReference = false;
7573 
7574     // Scan the components from the base to the complete expression.
7575     auto CI = Components.rbegin();
7576     auto CE = Components.rend();
7577     auto I = CI;
7578 
7579     // Track if the map information being generated is the first for a list of
7580     // components.
7581     bool IsExpressionFirstInfo = true;
7582     bool FirstPointerInComplexData = false;
7583     Address BP = Address::invalid();
7584     const Expr *AssocExpr = I->getAssociatedExpression();
7585     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7586     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7587     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7588 
7589     if (isa<MemberExpr>(AssocExpr)) {
7590       // The base is the 'this' pointer. The content of the pointer is going
7591       // to be the base of the field being mapped.
7592       BP = CGF.LoadCXXThisAddress();
7593     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7594                (OASE &&
7595                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7596       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7597     } else if (OAShE &&
7598                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7599       BP = Address(
7600           CGF.EmitScalarExpr(OAShE->getBase()),
7601           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7602     } else {
7603       // The base is the reference to the variable.
7604       // BP = &Var.
7605       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7606       if (const auto *VD =
7607               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7608         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7609                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7610           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7611               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7612                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7613             RequiresReference = true;
7614             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7615           }
7616         }
7617       }
7618 
7619       // If the variable is a pointer and is being dereferenced (i.e. is not
7620       // the last component), the base has to be the pointer itself, not its
7621       // reference. References are ignored for mapping purposes.
7622       QualType Ty =
7623           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7624       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7625         // No need to generate individual map information for the pointer, it
7626         // can be associated with the combined storage if shared memory mode is
7627         // active or the base declaration is not global variable.
7628         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7629         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7630             !VD || VD->hasLocalStorage())
7631           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7632         else
7633           FirstPointerInComplexData = true;
7634         ++I;
7635       }
7636     }
7637 
7638     // Track whether a component of the list should be marked as MEMBER_OF some
7639     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7640     // in a component list should be marked as MEMBER_OF, all subsequent entries
7641     // do not belong to the base struct. E.g.
7642     // struct S2 s;
7643     // s.ps->ps->ps->f[:]
7644     //   (1) (2) (3) (4)
7645     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7646     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7647     // is the pointee of ps(2) which is not member of struct s, so it should not
7648     // be marked as such (it is still PTR_AND_OBJ).
7649     // The variable is initialized to false so that PTR_AND_OBJ entries which
7650     // are not struct members are not considered (e.g. array of pointers to
7651     // data).
7652     bool ShouldBeMemberOf = false;
7653 
7654     // Variable keeping track of whether or not we have encountered a component
7655     // in the component list which is a member expression. Useful when we have a
7656     // pointer or a final array section, in which case it is the previous
7657     // component in the list which tells us whether we have a member expression.
7658     // E.g. X.f[:]
7659     // While processing the final array section "[:]" it is "f" which tells us
7660     // whether we are dealing with a member of a declared struct.
7661     const MemberExpr *EncounteredME = nullptr;
7662 
7663     // Track for the total number of dimension. Start from one for the dummy
7664     // dimension.
7665     uint64_t DimSize = 1;
7666 
7667     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7668 
7669     for (; I != CE; ++I) {
7670       // If the current component is member of a struct (parent struct) mark it.
7671       if (!EncounteredME) {
7672         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7673         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7674         // as MEMBER_OF the parent struct.
7675         if (EncounteredME) {
7676           ShouldBeMemberOf = true;
7677           // Do not emit as complex pointer if this is actually not array-like
7678           // expression.
7679           if (FirstPointerInComplexData) {
7680             QualType Ty = std::prev(I)
7681                               ->getAssociatedDeclaration()
7682                               ->getType()
7683                               .getNonReferenceType();
7684             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7685             FirstPointerInComplexData = false;
7686           }
7687         }
7688       }
7689 
7690       auto Next = std::next(I);
7691 
7692       // We need to generate the addresses and sizes if this is the last
7693       // component, if the component is a pointer or if it is an array section
7694       // whose length can't be proved to be one. If this is a pointer, it
7695       // becomes the base address for the following components.
7696 
7697       // A final array section, is one whose length can't be proved to be one.
7698       // If the map item is non-contiguous then we don't treat any array section
7699       // as final array section.
7700       bool IsFinalArraySection =
7701           !IsNonContiguous &&
7702           isFinalArraySectionExpression(I->getAssociatedExpression());
7703 
7704       // If we have a declaration for the mapping use that, otherwise use
7705       // the base declaration of the map clause.
7706       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7707                                      ? I->getAssociatedDeclaration()
7708                                      : BaseDecl;
7709 
7710       // Get information on whether the element is a pointer. Have to do a
7711       // special treatment for array sections given that they are built-in
7712       // types.
7713       const auto *OASE =
7714           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7715       const auto *OAShE =
7716           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7717       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7718       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7719       bool IsPointer =
7720           OAShE ||
7721           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7722                        .getCanonicalType()
7723                        ->isAnyPointerType()) ||
7724           I->getAssociatedExpression()->getType()->isAnyPointerType();
7725       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7726 
7727       if (OASE)
7728         ++DimSize;
7729 
7730       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7731         // If this is not the last component, we expect the pointer to be
7732         // associated with an array expression or member expression.
7733         assert((Next == CE ||
7734                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7735                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7736                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7737                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7738                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7739                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7740                "Unexpected expression");
7741 
7742         Address LB = Address::invalid();
7743         if (OAShE) {
7744           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7745                        CGF.getContext().getTypeAlignInChars(
7746                            OAShE->getBase()->getType()));
7747         } else {
7748           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7749                    .getAddress(CGF);
7750         }
7751 
7752         // If this component is a pointer inside the base struct then we don't
7753         // need to create any entry for it - it will be combined with the object
7754         // it is pointing to into a single PTR_AND_OBJ entry.
7755         bool IsMemberPointerOrAddr =
7756             (IsPointer || ForDeviceAddr) && EncounteredME &&
7757             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7758              EncounteredME);
7759         if (!OverlappedElements.empty() && Next == CE) {
7760           // Handle base element with the info for overlapped elements.
7761           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7762           assert(!IsPointer &&
7763                  "Unexpected base element with the pointer type.");
7764           // Mark the whole struct as the struct that requires allocation on the
7765           // device.
7766           PartialStruct.LowestElem = {0, LB};
7767           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7768               I->getAssociatedExpression()->getType());
7769           Address HB = CGF.Builder.CreateConstGEP(
7770               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7771                                                               CGF.VoidPtrTy),
7772               TypeSize.getQuantity() - 1);
7773           PartialStruct.HighestElem = {
7774               std::numeric_limits<decltype(
7775                   PartialStruct.HighestElem.first)>::max(),
7776               HB};
7777           PartialStruct.Base = BP;
7778           PartialStruct.LB = LB;
7779           assert(
7780               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7781               "Overlapped elements must be used only once for the variable.");
7782           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7783           // Emit data for non-overlapped data.
7784           OpenMPOffloadMappingFlags Flags =
7785               OMP_MAP_MEMBER_OF |
7786               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7787                              /*AddPtrFlag=*/false,
7788                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7789           llvm::Value *Size = nullptr;
7790           // Do bitcopy of all non-overlapped structure elements.
7791           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7792                    Component : OverlappedElements) {
7793             Address ComponentLB = Address::invalid();
7794             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7795                  Component) {
7796               if (MC.getAssociatedDeclaration()) {
7797                 ComponentLB =
7798                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7799                         .getAddress(CGF);
7800                 Size = CGF.Builder.CreatePtrDiff(
7801                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7802                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7803                 break;
7804               }
7805             }
7806             assert(Size && "Failed to determine structure size");
7807             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7808             CombinedInfo.BasePointers.push_back(BP.getPointer());
7809             CombinedInfo.Pointers.push_back(LB.getPointer());
7810             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7811                 Size, CGF.Int64Ty, /*isSigned=*/true));
7812             CombinedInfo.Types.push_back(Flags);
7813             CombinedInfo.Mappers.push_back(nullptr);
7814             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7815                                                                       : 1);
7816             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7817           }
7818           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7819           CombinedInfo.BasePointers.push_back(BP.getPointer());
7820           CombinedInfo.Pointers.push_back(LB.getPointer());
7821           Size = CGF.Builder.CreatePtrDiff(
7822               CGF.EmitCastToVoidPtr(
7823                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7824               CGF.EmitCastToVoidPtr(LB.getPointer()));
7825           CombinedInfo.Sizes.push_back(
7826               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7827           CombinedInfo.Types.push_back(Flags);
7828           CombinedInfo.Mappers.push_back(nullptr);
7829           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7830                                                                     : 1);
7831           break;
7832         }
7833         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7834         if (!IsMemberPointerOrAddr ||
7835             (Next == CE && MapType != OMPC_MAP_unknown)) {
7836           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7837           CombinedInfo.BasePointers.push_back(BP.getPointer());
7838           CombinedInfo.Pointers.push_back(LB.getPointer());
7839           CombinedInfo.Sizes.push_back(
7840               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7841           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7842                                                                     : 1);
7843 
7844           // If Mapper is valid, the last component inherits the mapper.
7845           bool HasMapper = Mapper && Next == CE;
7846           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7847 
7848           // We need to add a pointer flag for each map that comes from the
7849           // same expression except for the first one. We also need to signal
7850           // this map is the first one that relates with the current capture
7851           // (there is a set of entries for each capture).
7852           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7853               MapType, MapModifiers, MotionModifiers, IsImplicit,
7854               !IsExpressionFirstInfo || RequiresReference ||
7855                   FirstPointerInComplexData,
7856               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7857 
7858           if (!IsExpressionFirstInfo) {
7859             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7860             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7861             if (IsPointer)
7862               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7863                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7864 
7865             if (ShouldBeMemberOf) {
7866               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7867               // should be later updated with the correct value of MEMBER_OF.
7868               Flags |= OMP_MAP_MEMBER_OF;
7869               // From now on, all subsequent PTR_AND_OBJ entries should not be
7870               // marked as MEMBER_OF.
7871               ShouldBeMemberOf = false;
7872             }
7873           }
7874 
7875           CombinedInfo.Types.push_back(Flags);
7876         }
7877 
7878         // If we have encountered a member expression so far, keep track of the
7879         // mapped member. If the parent is "*this", then the value declaration
7880         // is nullptr.
7881         if (EncounteredME) {
7882           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7883           unsigned FieldIndex = FD->getFieldIndex();
7884 
7885           // Update info about the lowest and highest elements for this struct
7886           if (!PartialStruct.Base.isValid()) {
7887             PartialStruct.LowestElem = {FieldIndex, LB};
7888             if (IsFinalArraySection) {
7889               Address HB =
7890                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7891                       .getAddress(CGF);
7892               PartialStruct.HighestElem = {FieldIndex, HB};
7893             } else {
7894               PartialStruct.HighestElem = {FieldIndex, LB};
7895             }
7896             PartialStruct.Base = BP;
7897             PartialStruct.LB = BP;
7898           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7899             PartialStruct.LowestElem = {FieldIndex, LB};
7900           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7901             PartialStruct.HighestElem = {FieldIndex, LB};
7902           }
7903         }
7904 
7905         // Need to emit combined struct for array sections.
7906         if (IsFinalArraySection || IsNonContiguous)
7907           PartialStruct.IsArraySection = true;
7908 
7909         // If we have a final array section, we are done with this expression.
7910         if (IsFinalArraySection)
7911           break;
7912 
7913         // The pointer becomes the base for the next element.
7914         if (Next != CE)
7915           BP = LB;
7916 
7917         IsExpressionFirstInfo = false;
7918         IsCaptureFirstInfo = false;
7919         FirstPointerInComplexData = false;
7920       } else if (FirstPointerInComplexData) {
7921         QualType Ty = Components.rbegin()
7922                           ->getAssociatedDeclaration()
7923                           ->getType()
7924                           .getNonReferenceType();
7925         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7926         FirstPointerInComplexData = false;
7927       }
7928     }
7929     // If ran into the whole component - allocate the space for the whole
7930     // record.
7931     if (!EncounteredME)
7932       PartialStruct.HasCompleteRecord = true;
7933 
7934     if (!IsNonContiguous)
7935       return;
7936 
7937     const ASTContext &Context = CGF.getContext();
7938 
7939     // For supporting stride in array section, we need to initialize the first
7940     // dimension size as 1, first offset as 0, and first count as 1
7941     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7942     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7943     MapValuesArrayTy CurStrides;
7944     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7945     uint64_t ElementTypeSize;
7946 
7947     // Collect Size information for each dimension and get the element size as
7948     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7949     // should be [10, 10] and the first stride is 4 btyes.
7950     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7951          Components) {
7952       const Expr *AssocExpr = Component.getAssociatedExpression();
7953       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7954 
7955       if (!OASE)
7956         continue;
7957 
7958       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7959       auto *CAT = Context.getAsConstantArrayType(Ty);
7960       auto *VAT = Context.getAsVariableArrayType(Ty);
7961 
7962       // We need all the dimension size except for the last dimension.
7963       assert((VAT || CAT || &Component == &*Components.begin()) &&
7964              "Should be either ConstantArray or VariableArray if not the "
7965              "first Component");
7966 
7967       // Get element size if CurStrides is empty.
7968       if (CurStrides.empty()) {
7969         const Type *ElementType = nullptr;
7970         if (CAT)
7971           ElementType = CAT->getElementType().getTypePtr();
7972         else if (VAT)
7973           ElementType = VAT->getElementType().getTypePtr();
7974         else
7975           assert(&Component == &*Components.begin() &&
7976                  "Only expect pointer (non CAT or VAT) when this is the "
7977                  "first Component");
7978         // If ElementType is null, then it means the base is a pointer
7979         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7980         // for next iteration.
7981         if (ElementType) {
7982           // For the case that having pointer as base, we need to remove one
7983           // level of indirection.
7984           if (&Component != &*Components.begin())
7985             ElementType = ElementType->getPointeeOrArrayElementType();
7986           ElementTypeSize =
7987               Context.getTypeSizeInChars(ElementType).getQuantity();
7988           CurStrides.push_back(
7989               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7990         }
7991       }
7992       // Get dimension value except for the last dimension since we don't need
7993       // it.
7994       if (DimSizes.size() < Components.size() - 1) {
7995         if (CAT)
7996           DimSizes.push_back(llvm::ConstantInt::get(
7997               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7998         else if (VAT)
7999           DimSizes.push_back(CGF.Builder.CreateIntCast(
8000               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8001               /*IsSigned=*/false));
8002       }
8003     }
8004 
8005     // Skip the dummy dimension since we have already have its information.
8006     auto DI = DimSizes.begin() + 1;
8007     // Product of dimension.
8008     llvm::Value *DimProd =
8009         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8010 
8011     // Collect info for non-contiguous. Notice that offset, count, and stride
8012     // are only meaningful for array-section, so we insert a null for anything
8013     // other than array-section.
8014     // Also, the size of offset, count, and stride are not the same as
8015     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8016     // count, and stride are the same as the number of non-contiguous
8017     // declaration in target update to/from clause.
8018     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8019          Components) {
8020       const Expr *AssocExpr = Component.getAssociatedExpression();
8021 
8022       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8023         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8024             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8025             /*isSigned=*/false);
8026         CurOffsets.push_back(Offset);
8027         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8028         CurStrides.push_back(CurStrides.back());
8029         continue;
8030       }
8031 
8032       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8033 
8034       if (!OASE)
8035         continue;
8036 
8037       // Offset
8038       const Expr *OffsetExpr = OASE->getLowerBound();
8039       llvm::Value *Offset = nullptr;
8040       if (!OffsetExpr) {
8041         // If offset is absent, then we just set it to zero.
8042         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8043       } else {
8044         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8045                                            CGF.Int64Ty,
8046                                            /*isSigned=*/false);
8047       }
8048       CurOffsets.push_back(Offset);
8049 
8050       // Count
8051       const Expr *CountExpr = OASE->getLength();
8052       llvm::Value *Count = nullptr;
8053       if (!CountExpr) {
8054         // In Clang, once a high dimension is an array section, we construct all
8055         // the lower dimension as array section, however, for case like
8056         // arr[0:2][2], Clang construct the inner dimension as an array section
8057         // but it actually is not in an array section form according to spec.
8058         if (!OASE->getColonLocFirst().isValid() &&
8059             !OASE->getColonLocSecond().isValid()) {
8060           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8061         } else {
8062           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8063           // When the length is absent it defaults to ⌈(size −
8064           // lower-bound)/stride⌉, where size is the size of the array
8065           // dimension.
8066           const Expr *StrideExpr = OASE->getStride();
8067           llvm::Value *Stride =
8068               StrideExpr
8069                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8070                                               CGF.Int64Ty, /*isSigned=*/false)
8071                   : nullptr;
8072           if (Stride)
8073             Count = CGF.Builder.CreateUDiv(
8074                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8075           else
8076             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8077         }
8078       } else {
8079         Count = CGF.EmitScalarExpr(CountExpr);
8080       }
8081       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8082       CurCounts.push_back(Count);
8083 
8084       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8085       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8086       //              Offset      Count     Stride
8087       //    D0          0           1         4    (int)    <- dummy dimension
8088       //    D1          0           2         8    (2 * (1) * 4)
8089       //    D2          1           2         20   (1 * (1 * 5) * 4)
8090       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8091       const Expr *StrideExpr = OASE->getStride();
8092       llvm::Value *Stride =
8093           StrideExpr
8094               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8095                                           CGF.Int64Ty, /*isSigned=*/false)
8096               : nullptr;
8097       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8098       if (Stride)
8099         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8100       else
8101         CurStrides.push_back(DimProd);
8102       if (DI != DimSizes.end())
8103         ++DI;
8104     }
8105 
8106     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8107     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8108     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8109   }
8110 
8111   /// Return the adjusted map modifiers if the declaration a capture refers to
8112   /// appears in a first-private clause. This is expected to be used only with
8113   /// directives that start with 'target'.
8114   MappableExprsHandler::OpenMPOffloadMappingFlags
8115   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8116     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8117 
8118     // A first private variable captured by reference will use only the
8119     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8120     // declaration is known as first-private in this handler.
8121     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8122       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8123           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8124         return MappableExprsHandler::OMP_MAP_ALWAYS |
8125                MappableExprsHandler::OMP_MAP_TO;
8126       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8127         return MappableExprsHandler::OMP_MAP_TO |
8128                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8129       return MappableExprsHandler::OMP_MAP_PRIVATE |
8130              MappableExprsHandler::OMP_MAP_TO;
8131     }
8132     return MappableExprsHandler::OMP_MAP_TO |
8133            MappableExprsHandler::OMP_MAP_FROM;
8134   }
8135 
8136   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8137     // Rotate by getFlagMemberOffset() bits.
8138     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8139                                                   << getFlagMemberOffset());
8140   }
8141 
8142   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8143                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8144     // If the entry is PTR_AND_OBJ but has not been marked with the special
8145     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8146     // marked as MEMBER_OF.
8147     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8148         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8149       return;
8150 
8151     // Reset the placeholder value to prepare the flag for the assignment of the
8152     // proper MEMBER_OF value.
8153     Flags &= ~OMP_MAP_MEMBER_OF;
8154     Flags |= MemberOfFlag;
8155   }
8156 
8157   void getPlainLayout(const CXXRecordDecl *RD,
8158                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8159                       bool AsBase) const {
8160     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8161 
8162     llvm::StructType *St =
8163         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8164 
8165     unsigned NumElements = St->getNumElements();
8166     llvm::SmallVector<
8167         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8168         RecordLayout(NumElements);
8169 
8170     // Fill bases.
8171     for (const auto &I : RD->bases()) {
8172       if (I.isVirtual())
8173         continue;
8174       const auto *Base = I.getType()->getAsCXXRecordDecl();
8175       // Ignore empty bases.
8176       if (Base->isEmpty() || CGF.getContext()
8177                                  .getASTRecordLayout(Base)
8178                                  .getNonVirtualSize()
8179                                  .isZero())
8180         continue;
8181 
8182       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8183       RecordLayout[FieldIndex] = Base;
8184     }
8185     // Fill in virtual bases.
8186     for (const auto &I : RD->vbases()) {
8187       const auto *Base = I.getType()->getAsCXXRecordDecl();
8188       // Ignore empty bases.
8189       if (Base->isEmpty())
8190         continue;
8191       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8192       if (RecordLayout[FieldIndex])
8193         continue;
8194       RecordLayout[FieldIndex] = Base;
8195     }
8196     // Fill in all the fields.
8197     assert(!RD->isUnion() && "Unexpected union.");
8198     for (const auto *Field : RD->fields()) {
8199       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8200       // will fill in later.)
8201       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8202         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8203         RecordLayout[FieldIndex] = Field;
8204       }
8205     }
8206     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8207              &Data : RecordLayout) {
8208       if (Data.isNull())
8209         continue;
8210       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8211         getPlainLayout(Base, Layout, /*AsBase=*/true);
8212       else
8213         Layout.push_back(Data.get<const FieldDecl *>());
8214     }
8215   }
8216 
8217   /// Generate all the base pointers, section pointers, sizes, map types, and
8218   /// mappers for the extracted mappable expressions (all included in \a
8219   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8220   /// pair of the relevant declaration and index where it occurs is appended to
8221   /// the device pointers info array.
8222   void generateAllInfoForClauses(
8223       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8224       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8225           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8226     // We have to process the component lists that relate with the same
8227     // declaration in a single chunk so that we can generate the map flags
8228     // correctly. Therefore, we organize all lists in a map.
8229     enum MapKind { Present, Allocs, Other, Total };
8230     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8231                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8232         Info;
8233 
8234     // Helper function to fill the information map for the different supported
8235     // clauses.
8236     auto &&InfoGen =
8237         [&Info, &SkipVarSet](
8238             const ValueDecl *D, MapKind Kind,
8239             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8240             OpenMPMapClauseKind MapType,
8241             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8242             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8243             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8244             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8245           if (SkipVarSet.contains(D))
8246             return;
8247           auto It = Info.find(D);
8248           if (It == Info.end())
8249             It = Info
8250                      .insert(std::make_pair(
8251                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8252                      .first;
8253           It->second[Kind].emplace_back(
8254               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8255               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8256         };
8257 
8258     for (const auto *Cl : Clauses) {
8259       const auto *C = dyn_cast<OMPMapClause>(Cl);
8260       if (!C)
8261         continue;
8262       MapKind Kind = Other;
8263       if (!C->getMapTypeModifiers().empty() &&
8264           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8265             return K == OMPC_MAP_MODIFIER_present;
8266           }))
8267         Kind = Present;
8268       else if (C->getMapType() == OMPC_MAP_alloc)
8269         Kind = Allocs;
8270       const auto *EI = C->getVarRefs().begin();
8271       for (const auto L : C->component_lists()) {
8272         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8273         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8274                 C->getMapTypeModifiers(), llvm::None,
8275                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8276                 E);
8277         ++EI;
8278       }
8279     }
8280     for (const auto *Cl : Clauses) {
8281       const auto *C = dyn_cast<OMPToClause>(Cl);
8282       if (!C)
8283         continue;
8284       MapKind Kind = Other;
8285       if (!C->getMotionModifiers().empty() &&
8286           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8287             return K == OMPC_MOTION_MODIFIER_present;
8288           }))
8289         Kind = Present;
8290       const auto *EI = C->getVarRefs().begin();
8291       for (const auto L : C->component_lists()) {
8292         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8293                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8294                 C->isImplicit(), std::get<2>(L), *EI);
8295         ++EI;
8296       }
8297     }
8298     for (const auto *Cl : Clauses) {
8299       const auto *C = dyn_cast<OMPFromClause>(Cl);
8300       if (!C)
8301         continue;
8302       MapKind Kind = Other;
8303       if (!C->getMotionModifiers().empty() &&
8304           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8305             return K == OMPC_MOTION_MODIFIER_present;
8306           }))
8307         Kind = Present;
8308       const auto *EI = C->getVarRefs().begin();
8309       for (const auto L : C->component_lists()) {
8310         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8311                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8312                 C->isImplicit(), std::get<2>(L), *EI);
8313         ++EI;
8314       }
8315     }
8316 
8317     // Look at the use_device_ptr clause information and mark the existing map
8318     // entries as such. If there is no map information for an entry in the
8319     // use_device_ptr list, we create one with map type 'alloc' and zero size
8320     // section. It is the user fault if that was not mapped before. If there is
8321     // no map information and the pointer is a struct member, then we defer the
8322     // emission of that entry until the whole struct has been processed.
8323     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8324                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8325         DeferredInfo;
8326     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8327 
8328     for (const auto *Cl : Clauses) {
8329       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8330       if (!C)
8331         continue;
8332       for (const auto L : C->component_lists()) {
8333         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8334             std::get<1>(L);
8335         assert(!Components.empty() &&
8336                "Not expecting empty list of components!");
8337         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8338         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8339         const Expr *IE = Components.back().getAssociatedExpression();
8340         // If the first component is a member expression, we have to look into
8341         // 'this', which maps to null in the map of map information. Otherwise
8342         // look directly for the information.
8343         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8344 
8345         // We potentially have map information for this declaration already.
8346         // Look for the first set of components that refer to it.
8347         if (It != Info.end()) {
8348           bool Found = false;
8349           for (auto &Data : It->second) {
8350             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8351               return MI.Components.back().getAssociatedDeclaration() == VD;
8352             });
8353             // If we found a map entry, signal that the pointer has to be
8354             // returned and move on to the next declaration. Exclude cases where
8355             // the base pointer is mapped as array subscript, array section or
8356             // array shaping. The base address is passed as a pointer to base in
8357             // this case and cannot be used as a base for use_device_ptr list
8358             // item.
8359             if (CI != Data.end()) {
8360               auto PrevCI = std::next(CI->Components.rbegin());
8361               const auto *VarD = dyn_cast<VarDecl>(VD);
8362               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8363                   isa<MemberExpr>(IE) ||
8364                   !VD->getType().getNonReferenceType()->isPointerType() ||
8365                   PrevCI == CI->Components.rend() ||
8366                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8367                   VarD->hasLocalStorage()) {
8368                 CI->ReturnDevicePointer = true;
8369                 Found = true;
8370                 break;
8371               }
8372             }
8373           }
8374           if (Found)
8375             continue;
8376         }
8377 
8378         // We didn't find any match in our map information - generate a zero
8379         // size array section - if the pointer is a struct member we defer this
8380         // action until the whole struct has been processed.
8381         if (isa<MemberExpr>(IE)) {
8382           // Insert the pointer into Info to be processed by
8383           // generateInfoForComponentList. Because it is a member pointer
8384           // without a pointee, no entry will be generated for it, therefore
8385           // we need to generate one after the whole struct has been processed.
8386           // Nonetheless, generateInfoForComponentList must be called to take
8387           // the pointer into account for the calculation of the range of the
8388           // partial struct.
8389           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8390                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8391                   nullptr);
8392           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8393         } else {
8394           llvm::Value *Ptr =
8395               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8396           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8397           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8398           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8399           UseDevicePtrCombinedInfo.Sizes.push_back(
8400               llvm::Constant::getNullValue(CGF.Int64Ty));
8401           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8402           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8403         }
8404       }
8405     }
8406 
8407     // Look at the use_device_addr clause information and mark the existing map
8408     // entries as such. If there is no map information for an entry in the
8409     // use_device_addr list, we create one with map type 'alloc' and zero size
8410     // section. It is the user fault if that was not mapped before. If there is
8411     // no map information and the pointer is a struct member, then we defer the
8412     // emission of that entry until the whole struct has been processed.
8413     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8414     for (const auto *Cl : Clauses) {
8415       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8416       if (!C)
8417         continue;
8418       for (const auto L : C->component_lists()) {
8419         assert(!std::get<1>(L).empty() &&
8420                "Not expecting empty list of components!");
8421         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8422         if (!Processed.insert(VD).second)
8423           continue;
8424         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8425         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8426         // If the first component is a member expression, we have to look into
8427         // 'this', which maps to null in the map of map information. Otherwise
8428         // look directly for the information.
8429         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8430 
8431         // We potentially have map information for this declaration already.
8432         // Look for the first set of components that refer to it.
8433         if (It != Info.end()) {
8434           bool Found = false;
8435           for (auto &Data : It->second) {
8436             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8437               return MI.Components.back().getAssociatedDeclaration() == VD;
8438             });
8439             // If we found a map entry, signal that the pointer has to be
8440             // returned and move on to the next declaration.
8441             if (CI != Data.end()) {
8442               CI->ReturnDevicePointer = true;
8443               Found = true;
8444               break;
8445             }
8446           }
8447           if (Found)
8448             continue;
8449         }
8450 
8451         // We didn't find any match in our map information - generate a zero
8452         // size array section - if the pointer is a struct member we defer this
8453         // action until the whole struct has been processed.
8454         if (isa<MemberExpr>(IE)) {
8455           // Insert the pointer into Info to be processed by
8456           // generateInfoForComponentList. Because it is a member pointer
8457           // without a pointee, no entry will be generated for it, therefore
8458           // we need to generate one after the whole struct has been processed.
8459           // Nonetheless, generateInfoForComponentList must be called to take
8460           // the pointer into account for the calculation of the range of the
8461           // partial struct.
8462           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8463                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8464                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8465           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8466         } else {
8467           llvm::Value *Ptr;
8468           if (IE->isGLValue())
8469             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8470           else
8471             Ptr = CGF.EmitScalarExpr(IE);
8472           CombinedInfo.Exprs.push_back(VD);
8473           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8474           CombinedInfo.Pointers.push_back(Ptr);
8475           CombinedInfo.Sizes.push_back(
8476               llvm::Constant::getNullValue(CGF.Int64Ty));
8477           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8478           CombinedInfo.Mappers.push_back(nullptr);
8479         }
8480       }
8481     }
8482 
8483     for (const auto &Data : Info) {
8484       StructRangeInfoTy PartialStruct;
8485       // Temporary generated information.
8486       MapCombinedInfoTy CurInfo;
8487       const Decl *D = Data.first;
8488       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8489       for (const auto &M : Data.second) {
8490         for (const MapInfo &L : M) {
8491           assert(!L.Components.empty() &&
8492                  "Not expecting declaration with no component lists.");
8493 
8494           // Remember the current base pointer index.
8495           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8496           CurInfo.NonContigInfo.IsNonContiguous =
8497               L.Components.back().isNonContiguous();
8498           generateInfoForComponentList(
8499               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8500               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8501               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8502 
8503           // If this entry relates with a device pointer, set the relevant
8504           // declaration and add the 'return pointer' flag.
8505           if (L.ReturnDevicePointer) {
8506             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8507                    "Unexpected number of mapped base pointers.");
8508 
8509             const ValueDecl *RelevantVD =
8510                 L.Components.back().getAssociatedDeclaration();
8511             assert(RelevantVD &&
8512                    "No relevant declaration related with device pointer??");
8513 
8514             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8515                 RelevantVD);
8516             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8517           }
8518         }
8519       }
8520 
8521       // Append any pending zero-length pointers which are struct members and
8522       // used with use_device_ptr or use_device_addr.
8523       auto CI = DeferredInfo.find(Data.first);
8524       if (CI != DeferredInfo.end()) {
8525         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8526           llvm::Value *BasePtr;
8527           llvm::Value *Ptr;
8528           if (L.ForDeviceAddr) {
8529             if (L.IE->isGLValue())
8530               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8531             else
8532               Ptr = this->CGF.EmitScalarExpr(L.IE);
8533             BasePtr = Ptr;
8534             // Entry is RETURN_PARAM. Also, set the placeholder value
8535             // MEMBER_OF=FFFF so that the entry is later updated with the
8536             // correct value of MEMBER_OF.
8537             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8538           } else {
8539             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8540             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8541                                              L.IE->getExprLoc());
8542             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8543             // placeholder value MEMBER_OF=FFFF so that the entry is later
8544             // updated with the correct value of MEMBER_OF.
8545             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8546                                     OMP_MAP_MEMBER_OF);
8547           }
8548           CurInfo.Exprs.push_back(L.VD);
8549           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8550           CurInfo.Pointers.push_back(Ptr);
8551           CurInfo.Sizes.push_back(
8552               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8553           CurInfo.Mappers.push_back(nullptr);
8554         }
8555       }
8556       // If there is an entry in PartialStruct it means we have a struct with
8557       // individual members mapped. Emit an extra combined entry.
8558       if (PartialStruct.Base.isValid()) {
8559         CurInfo.NonContigInfo.Dims.push_back(0);
8560         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8561       }
8562 
8563       // We need to append the results of this capture to what we already
8564       // have.
8565       CombinedInfo.append(CurInfo);
8566     }
8567     // Append data for use_device_ptr clauses.
8568     CombinedInfo.append(UseDevicePtrCombinedInfo);
8569   }
8570 
8571 public:
8572   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8573       : CurDir(&Dir), CGF(CGF) {
8574     // Extract firstprivate clause information.
8575     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8576       for (const auto *D : C->varlists())
8577         FirstPrivateDecls.try_emplace(
8578             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8579     // Extract implicit firstprivates from uses_allocators clauses.
8580     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8581       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8582         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8583         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8584           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8585                                         /*Implicit=*/true);
8586         else if (const auto *VD = dyn_cast<VarDecl>(
8587                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8588                          ->getDecl()))
8589           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8590       }
8591     }
8592     // Extract device pointer clause information.
8593     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8594       for (auto L : C->component_lists())
8595         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8596   }
8597 
8598   /// Constructor for the declare mapper directive.
8599   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8600       : CurDir(&Dir), CGF(CGF) {}
8601 
8602   /// Generate code for the combined entry if we have a partially mapped struct
8603   /// and take care of the mapping flags of the arguments corresponding to
8604   /// individual struct members.
8605   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8606                          MapFlagsArrayTy &CurTypes,
8607                          const StructRangeInfoTy &PartialStruct,
8608                          const ValueDecl *VD = nullptr,
8609                          bool NotTargetParams = true) const {
8610     if (CurTypes.size() == 1 &&
8611         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8612         !PartialStruct.IsArraySection)
8613       return;
8614     Address LBAddr = PartialStruct.LowestElem.second;
8615     Address HBAddr = PartialStruct.HighestElem.second;
8616     if (PartialStruct.HasCompleteRecord) {
8617       LBAddr = PartialStruct.LB;
8618       HBAddr = PartialStruct.LB;
8619     }
8620     CombinedInfo.Exprs.push_back(VD);
8621     // Base is the base of the struct
8622     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8623     // Pointer is the address of the lowest element
8624     llvm::Value *LB = LBAddr.getPointer();
8625     CombinedInfo.Pointers.push_back(LB);
8626     // There should not be a mapper for a combined entry.
8627     CombinedInfo.Mappers.push_back(nullptr);
8628     // Size is (addr of {highest+1} element) - (addr of lowest element)
8629     llvm::Value *HB = HBAddr.getPointer();
8630     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8631     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8632     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8633     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8634     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8635                                                   /*isSigned=*/false);
8636     CombinedInfo.Sizes.push_back(Size);
8637     // Map type is always TARGET_PARAM, if generate info for captures.
8638     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8639                                                  : OMP_MAP_TARGET_PARAM);
8640     // If any element has the present modifier, then make sure the runtime
8641     // doesn't attempt to allocate the struct.
8642     if (CurTypes.end() !=
8643         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8644           return Type & OMP_MAP_PRESENT;
8645         }))
8646       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8647     // Remove TARGET_PARAM flag from the first element
8648     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8649 
8650     // All other current entries will be MEMBER_OF the combined entry
8651     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8652     // 0xFFFF in the MEMBER_OF field).
8653     OpenMPOffloadMappingFlags MemberOfFlag =
8654         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8655     for (auto &M : CurTypes)
8656       setCorrectMemberOfFlag(M, MemberOfFlag);
8657   }
8658 
8659   /// Generate all the base pointers, section pointers, sizes, map types, and
8660   /// mappers for the extracted mappable expressions (all included in \a
8661   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8662   /// pair of the relevant declaration and index where it occurs is appended to
8663   /// the device pointers info array.
8664   void generateAllInfo(
8665       MapCombinedInfoTy &CombinedInfo,
8666       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8667           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8668     assert(CurDir.is<const OMPExecutableDirective *>() &&
8669            "Expect a executable directive");
8670     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8671     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8672   }
8673 
8674   /// Generate all the base pointers, section pointers, sizes, map types, and
8675   /// mappers for the extracted map clauses of user-defined mapper (all included
8676   /// in \a CombinedInfo).
8677   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8678     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8679            "Expect a declare mapper directive");
8680     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8681     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8682   }
8683 
8684   /// Emit capture info for lambdas for variables captured by reference.
8685   void generateInfoForLambdaCaptures(
8686       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8687       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8688     const auto *RD = VD->getType()
8689                          .getCanonicalType()
8690                          .getNonReferenceType()
8691                          ->getAsCXXRecordDecl();
8692     if (!RD || !RD->isLambda())
8693       return;
8694     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8695     LValue VDLVal = CGF.MakeAddrLValue(
8696         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8697     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8698     FieldDecl *ThisCapture = nullptr;
8699     RD->getCaptureFields(Captures, ThisCapture);
8700     if (ThisCapture) {
8701       LValue ThisLVal =
8702           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8703       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8704       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8705                                  VDLVal.getPointer(CGF));
8706       CombinedInfo.Exprs.push_back(VD);
8707       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8708       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8709       CombinedInfo.Sizes.push_back(
8710           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8711                                     CGF.Int64Ty, /*isSigned=*/true));
8712       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8713                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8714       CombinedInfo.Mappers.push_back(nullptr);
8715     }
8716     for (const LambdaCapture &LC : RD->captures()) {
8717       if (!LC.capturesVariable())
8718         continue;
8719       const VarDecl *VD = LC.getCapturedVar();
8720       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8721         continue;
8722       auto It = Captures.find(VD);
8723       assert(It != Captures.end() && "Found lambda capture without field.");
8724       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8725       if (LC.getCaptureKind() == LCK_ByRef) {
8726         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8727         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8728                                    VDLVal.getPointer(CGF));
8729         CombinedInfo.Exprs.push_back(VD);
8730         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8731         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8732         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8733             CGF.getTypeSize(
8734                 VD->getType().getCanonicalType().getNonReferenceType()),
8735             CGF.Int64Ty, /*isSigned=*/true));
8736       } else {
8737         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8738         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8739                                    VDLVal.getPointer(CGF));
8740         CombinedInfo.Exprs.push_back(VD);
8741         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8742         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8743         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8744       }
8745       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8746                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8747       CombinedInfo.Mappers.push_back(nullptr);
8748     }
8749   }
8750 
8751   /// Set correct indices for lambdas captures.
8752   void adjustMemberOfForLambdaCaptures(
8753       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8754       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8755       MapFlagsArrayTy &Types) const {
8756     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8757       // Set correct member_of idx for all implicit lambda captures.
8758       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8759                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8760         continue;
8761       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8762       assert(BasePtr && "Unable to find base lambda address.");
8763       int TgtIdx = -1;
8764       for (unsigned J = I; J > 0; --J) {
8765         unsigned Idx = J - 1;
8766         if (Pointers[Idx] != BasePtr)
8767           continue;
8768         TgtIdx = Idx;
8769         break;
8770       }
8771       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8772       // All other current entries will be MEMBER_OF the combined entry
8773       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8774       // 0xFFFF in the MEMBER_OF field).
8775       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8776       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8777     }
8778   }
8779 
8780   /// Generate the base pointers, section pointers, sizes, map types, and
8781   /// mappers associated to a given capture (all included in \a CombinedInfo).
8782   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8783                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8784                               StructRangeInfoTy &PartialStruct) const {
8785     assert(!Cap->capturesVariableArrayType() &&
8786            "Not expecting to generate map info for a variable array type!");
8787 
8788     // We need to know when we generating information for the first component
8789     const ValueDecl *VD = Cap->capturesThis()
8790                               ? nullptr
8791                               : Cap->getCapturedVar()->getCanonicalDecl();
8792 
8793     // If this declaration appears in a is_device_ptr clause we just have to
8794     // pass the pointer by value. If it is a reference to a declaration, we just
8795     // pass its value.
8796     if (DevPointersMap.count(VD)) {
8797       CombinedInfo.Exprs.push_back(VD);
8798       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8799       CombinedInfo.Pointers.push_back(Arg);
8800       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8801           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8802           /*isSigned=*/true));
8803       CombinedInfo.Types.push_back(
8804           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8805           OMP_MAP_TARGET_PARAM);
8806       CombinedInfo.Mappers.push_back(nullptr);
8807       return;
8808     }
8809 
8810     using MapData =
8811         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8812                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8813                    const ValueDecl *, const Expr *>;
8814     SmallVector<MapData, 4> DeclComponentLists;
8815     assert(CurDir.is<const OMPExecutableDirective *>() &&
8816            "Expect a executable directive");
8817     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8818     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8819       const auto *EI = C->getVarRefs().begin();
8820       for (const auto L : C->decl_component_lists(VD)) {
8821         const ValueDecl *VDecl, *Mapper;
8822         // The Expression is not correct if the mapping is implicit
8823         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8824         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8825         std::tie(VDecl, Components, Mapper) = L;
8826         assert(VDecl == VD && "We got information for the wrong declaration??");
8827         assert(!Components.empty() &&
8828                "Not expecting declaration with no component lists.");
8829         DeclComponentLists.emplace_back(Components, C->getMapType(),
8830                                         C->getMapTypeModifiers(),
8831                                         C->isImplicit(), Mapper, E);
8832         ++EI;
8833       }
8834     }
8835     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8836                                              const MapData &RHS) {
8837       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8838       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8839       bool HasPresent = !MapModifiers.empty() &&
8840                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8841                           return K == clang::OMPC_MAP_MODIFIER_present;
8842                         });
8843       bool HasAllocs = MapType == OMPC_MAP_alloc;
8844       MapModifiers = std::get<2>(RHS);
8845       MapType = std::get<1>(LHS);
8846       bool HasPresentR =
8847           !MapModifiers.empty() &&
8848           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8849             return K == clang::OMPC_MAP_MODIFIER_present;
8850           });
8851       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8852       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8853     });
8854 
8855     // Find overlapping elements (including the offset from the base element).
8856     llvm::SmallDenseMap<
8857         const MapData *,
8858         llvm::SmallVector<
8859             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8860         4>
8861         OverlappedData;
8862     size_t Count = 0;
8863     for (const MapData &L : DeclComponentLists) {
8864       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8865       OpenMPMapClauseKind MapType;
8866       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8867       bool IsImplicit;
8868       const ValueDecl *Mapper;
8869       const Expr *VarRef;
8870       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8871           L;
8872       ++Count;
8873       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8874         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8875         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8876                  VarRef) = L1;
8877         auto CI = Components.rbegin();
8878         auto CE = Components.rend();
8879         auto SI = Components1.rbegin();
8880         auto SE = Components1.rend();
8881         for (; CI != CE && SI != SE; ++CI, ++SI) {
8882           if (CI->getAssociatedExpression()->getStmtClass() !=
8883               SI->getAssociatedExpression()->getStmtClass())
8884             break;
8885           // Are we dealing with different variables/fields?
8886           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8887             break;
8888         }
8889         // Found overlapping if, at least for one component, reached the head
8890         // of the components list.
8891         if (CI == CE || SI == SE) {
8892           // Ignore it if it is the same component.
8893           if (CI == CE && SI == SE)
8894             continue;
8895           const auto It = (SI == SE) ? CI : SI;
8896           // If one component is a pointer and another one is a kind of
8897           // dereference of this pointer (array subscript, section, dereference,
8898           // etc.), it is not an overlapping.
8899           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8900               std::prev(It)
8901                   ->getAssociatedExpression()
8902                   ->getType()
8903                   .getNonReferenceType()
8904                   ->isPointerType())
8905             continue;
8906           const MapData &BaseData = CI == CE ? L : L1;
8907           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8908               SI == SE ? Components : Components1;
8909           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8910           OverlappedElements.getSecond().push_back(SubData);
8911         }
8912       }
8913     }
8914     // Sort the overlapped elements for each item.
8915     llvm::SmallVector<const FieldDecl *, 4> Layout;
8916     if (!OverlappedData.empty()) {
8917       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8918       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8919       while (BaseType != OrigType) {
8920         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8921         OrigType = BaseType->getPointeeOrArrayElementType();
8922       }
8923 
8924       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8925         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8926       else {
8927         const auto *RD = BaseType->getAsRecordDecl();
8928         Layout.append(RD->field_begin(), RD->field_end());
8929       }
8930     }
8931     for (auto &Pair : OverlappedData) {
8932       llvm::stable_sort(
8933           Pair.getSecond(),
8934           [&Layout](
8935               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8936               OMPClauseMappableExprCommon::MappableExprComponentListRef
8937                   Second) {
8938             auto CI = First.rbegin();
8939             auto CE = First.rend();
8940             auto SI = Second.rbegin();
8941             auto SE = Second.rend();
8942             for (; CI != CE && SI != SE; ++CI, ++SI) {
8943               if (CI->getAssociatedExpression()->getStmtClass() !=
8944                   SI->getAssociatedExpression()->getStmtClass())
8945                 break;
8946               // Are we dealing with different variables/fields?
8947               if (CI->getAssociatedDeclaration() !=
8948                   SI->getAssociatedDeclaration())
8949                 break;
8950             }
8951 
8952             // Lists contain the same elements.
8953             if (CI == CE && SI == SE)
8954               return false;
8955 
8956             // List with less elements is less than list with more elements.
8957             if (CI == CE || SI == SE)
8958               return CI == CE;
8959 
8960             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8961             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8962             if (FD1->getParent() == FD2->getParent())
8963               return FD1->getFieldIndex() < FD2->getFieldIndex();
8964             const auto It =
8965                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8966                   return FD == FD1 || FD == FD2;
8967                 });
8968             return *It == FD1;
8969           });
8970     }
8971 
8972     // Associated with a capture, because the mapping flags depend on it.
8973     // Go through all of the elements with the overlapped elements.
8974     bool IsFirstComponentList = true;
8975     for (const auto &Pair : OverlappedData) {
8976       const MapData &L = *Pair.getFirst();
8977       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8978       OpenMPMapClauseKind MapType;
8979       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8980       bool IsImplicit;
8981       const ValueDecl *Mapper;
8982       const Expr *VarRef;
8983       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8984           L;
8985       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8986           OverlappedComponents = Pair.getSecond();
8987       generateInfoForComponentList(
8988           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8989           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8990           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8991       IsFirstComponentList = false;
8992     }
8993     // Go through other elements without overlapped elements.
8994     for (const MapData &L : DeclComponentLists) {
8995       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8996       OpenMPMapClauseKind MapType;
8997       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8998       bool IsImplicit;
8999       const ValueDecl *Mapper;
9000       const Expr *VarRef;
9001       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9002           L;
9003       auto It = OverlappedData.find(&L);
9004       if (It == OverlappedData.end())
9005         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9006                                      Components, CombinedInfo, PartialStruct,
9007                                      IsFirstComponentList, IsImplicit, Mapper,
9008                                      /*ForDeviceAddr=*/false, VD, VarRef);
9009       IsFirstComponentList = false;
9010     }
9011   }
9012 
9013   /// Generate the default map information for a given capture \a CI,
9014   /// record field declaration \a RI and captured value \a CV.
9015   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9016                               const FieldDecl &RI, llvm::Value *CV,
9017                               MapCombinedInfoTy &CombinedInfo) const {
9018     bool IsImplicit = true;
9019     // Do the default mapping.
9020     if (CI.capturesThis()) {
9021       CombinedInfo.Exprs.push_back(nullptr);
9022       CombinedInfo.BasePointers.push_back(CV);
9023       CombinedInfo.Pointers.push_back(CV);
9024       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9025       CombinedInfo.Sizes.push_back(
9026           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9027                                     CGF.Int64Ty, /*isSigned=*/true));
9028       // Default map type.
9029       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9030     } else if (CI.capturesVariableByCopy()) {
9031       const VarDecl *VD = CI.getCapturedVar();
9032       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9033       CombinedInfo.BasePointers.push_back(CV);
9034       CombinedInfo.Pointers.push_back(CV);
9035       if (!RI.getType()->isAnyPointerType()) {
9036         // We have to signal to the runtime captures passed by value that are
9037         // not pointers.
9038         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9039         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9040             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9041       } else {
9042         // Pointers are implicitly mapped with a zero size and no flags
9043         // (other than first map that is added for all implicit maps).
9044         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9045         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9046       }
9047       auto I = FirstPrivateDecls.find(VD);
9048       if (I != FirstPrivateDecls.end())
9049         IsImplicit = I->getSecond();
9050     } else {
9051       assert(CI.capturesVariable() && "Expected captured reference.");
9052       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9053       QualType ElementType = PtrTy->getPointeeType();
9054       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9055           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9056       // The default map type for a scalar/complex type is 'to' because by
9057       // default the value doesn't have to be retrieved. For an aggregate
9058       // type, the default is 'tofrom'.
9059       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9060       const VarDecl *VD = CI.getCapturedVar();
9061       auto I = FirstPrivateDecls.find(VD);
9062       if (I != FirstPrivateDecls.end() &&
9063           VD->getType().isConstant(CGF.getContext())) {
9064         llvm::Constant *Addr =
9065             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9066         // Copy the value of the original variable to the new global copy.
9067         CGF.Builder.CreateMemCpy(
9068             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9069             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9070             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9071         // Use new global variable as the base pointers.
9072         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9073         CombinedInfo.BasePointers.push_back(Addr);
9074         CombinedInfo.Pointers.push_back(Addr);
9075       } else {
9076         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9077         CombinedInfo.BasePointers.push_back(CV);
9078         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9079           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9080               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9081               AlignmentSource::Decl));
9082           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9083         } else {
9084           CombinedInfo.Pointers.push_back(CV);
9085         }
9086       }
9087       if (I != FirstPrivateDecls.end())
9088         IsImplicit = I->getSecond();
9089     }
9090     // Every default map produces a single argument which is a target parameter.
9091     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9092 
9093     // Add flag stating this is an implicit map.
9094     if (IsImplicit)
9095       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9096 
9097     // No user-defined mapper for default mapping.
9098     CombinedInfo.Mappers.push_back(nullptr);
9099   }
9100 };
9101 } // anonymous namespace
9102 
9103 static void emitNonContiguousDescriptor(
9104     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9105     CGOpenMPRuntime::TargetDataInfo &Info) {
9106   CodeGenModule &CGM = CGF.CGM;
9107   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9108       &NonContigInfo = CombinedInfo.NonContigInfo;
9109 
9110   // Build an array of struct descriptor_dim and then assign it to
9111   // offload_args.
9112   //
9113   // struct descriptor_dim {
9114   //  uint64_t offset;
9115   //  uint64_t count;
9116   //  uint64_t stride
9117   // };
9118   ASTContext &C = CGF.getContext();
9119   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9120   RecordDecl *RD;
9121   RD = C.buildImplicitRecord("descriptor_dim");
9122   RD->startDefinition();
9123   addFieldToRecordDecl(C, RD, Int64Ty);
9124   addFieldToRecordDecl(C, RD, Int64Ty);
9125   addFieldToRecordDecl(C, RD, Int64Ty);
9126   RD->completeDefinition();
9127   QualType DimTy = C.getRecordType(RD);
9128 
9129   enum { OffsetFD = 0, CountFD, StrideFD };
9130   // We need two index variable here since the size of "Dims" is the same as the
9131   // size of Components, however, the size of offset, count, and stride is equal
9132   // to the size of base declaration that is non-contiguous.
9133   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9134     // Skip emitting ir if dimension size is 1 since it cannot be
9135     // non-contiguous.
9136     if (NonContigInfo.Dims[I] == 1)
9137       continue;
9138     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9139     QualType ArrayTy =
9140         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9141     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9142     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9143       unsigned RevIdx = EE - II - 1;
9144       LValue DimsLVal = CGF.MakeAddrLValue(
9145           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9146       // Offset
9147       LValue OffsetLVal = CGF.EmitLValueForField(
9148           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9149       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9150       // Count
9151       LValue CountLVal = CGF.EmitLValueForField(
9152           DimsLVal, *std::next(RD->field_begin(), CountFD));
9153       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9154       // Stride
9155       LValue StrideLVal = CGF.EmitLValueForField(
9156           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9157       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9158     }
9159     // args[I] = &dims
9160     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9161         DimsAddr, CGM.Int8PtrTy);
9162     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9163         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9164         Info.PointersArray, 0, I);
9165     Address PAddr(P, CGF.getPointerAlign());
9166     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9167     ++L;
9168   }
9169 }
9170 
9171 /// Emit a string constant containing the names of the values mapped to the
9172 /// offloading runtime library.
9173 llvm::Constant *
9174 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9175                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9176   llvm::Constant *SrcLocStr;
9177   if (!MapExprs.getMapDecl()) {
9178     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9179   } else {
9180     std::string ExprName = "";
9181     if (MapExprs.getMapExpr()) {
9182       PrintingPolicy P(CGF.getContext().getLangOpts());
9183       llvm::raw_string_ostream OS(ExprName);
9184       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9185       OS.flush();
9186     } else {
9187       ExprName = MapExprs.getMapDecl()->getNameAsString();
9188     }
9189 
9190     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9191     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9192     const char *FileName = PLoc.getFilename();
9193     unsigned Line = PLoc.getLine();
9194     unsigned Column = PLoc.getColumn();
9195     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9196                                                 Line, Column);
9197   }
9198 
9199   return SrcLocStr;
9200 }
9201 
9202 /// Emit the arrays used to pass the captures and map information to the
9203 /// offloading runtime library. If there is no map or capture information,
9204 /// return nullptr by reference.
9205 static void emitOffloadingArrays(
9206     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9207     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9208     bool IsNonContiguous = false) {
9209   CodeGenModule &CGM = CGF.CGM;
9210   ASTContext &Ctx = CGF.getContext();
9211 
9212   // Reset the array information.
9213   Info.clearArrayInfo();
9214   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9215 
9216   if (Info.NumberOfPtrs) {
9217     // Detect if we have any capture size requiring runtime evaluation of the
9218     // size so that a constant array could be eventually used.
9219     bool hasRuntimeEvaluationCaptureSize = false;
9220     for (llvm::Value *S : CombinedInfo.Sizes)
9221       if (!isa<llvm::Constant>(S)) {
9222         hasRuntimeEvaluationCaptureSize = true;
9223         break;
9224       }
9225 
9226     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9227     QualType PointerArrayType = Ctx.getConstantArrayType(
9228         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9229         /*IndexTypeQuals=*/0);
9230 
9231     Info.BasePointersArray =
9232         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9233     Info.PointersArray =
9234         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9235     Address MappersArray =
9236         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9237     Info.MappersArray = MappersArray.getPointer();
9238 
9239     // If we don't have any VLA types or other types that require runtime
9240     // evaluation, we can use a constant array for the map sizes, otherwise we
9241     // need to fill up the arrays as we do for the pointers.
9242     QualType Int64Ty =
9243         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9244     if (hasRuntimeEvaluationCaptureSize) {
9245       QualType SizeArrayType = Ctx.getConstantArrayType(
9246           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9247           /*IndexTypeQuals=*/0);
9248       Info.SizesArray =
9249           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9250     } else {
9251       // We expect all the sizes to be constant, so we collect them to create
9252       // a constant array.
9253       SmallVector<llvm::Constant *, 16> ConstSizes;
9254       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9255         if (IsNonContiguous &&
9256             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9257           ConstSizes.push_back(llvm::ConstantInt::get(
9258               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9259         } else {
9260           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9261         }
9262       }
9263 
9264       auto *SizesArrayInit = llvm::ConstantArray::get(
9265           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9266       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9267       auto *SizesArrayGbl = new llvm::GlobalVariable(
9268           CGM.getModule(), SizesArrayInit->getType(),
9269           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9270           SizesArrayInit, Name);
9271       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9272       Info.SizesArray = SizesArrayGbl;
9273     }
9274 
9275     // The map types are always constant so we don't need to generate code to
9276     // fill arrays. Instead, we create an array constant.
9277     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9278     llvm::copy(CombinedInfo.Types, Mapping.begin());
9279     llvm::Constant *MapTypesArrayInit =
9280         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9281     std::string MaptypesName =
9282         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9283     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9284         CGM.getModule(), MapTypesArrayInit->getType(),
9285         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9286         MapTypesArrayInit, MaptypesName);
9287     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9288     Info.MapTypesArray = MapTypesArrayGbl;
9289 
9290     // The information types are only built if there is debug information
9291     // requested.
9292     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9293       Info.MapNamesArray = llvm::Constant::getNullValue(
9294           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9295     } else {
9296       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9297         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9298       };
9299       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9300       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9301 
9302       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9303           llvm::ArrayType::get(
9304               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9305               CombinedInfo.Exprs.size()),
9306           InfoMap);
9307       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9308           CGM.getModule(), MapNamesArrayInit->getType(),
9309           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9310           MapNamesArrayInit,
9311           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9312       Info.MapNamesArray = MapNamesArrayGbl;
9313     }
9314 
9315     // If there's a present map type modifier, it must not be applied to the end
9316     // of a region, so generate a separate map type array in that case.
9317     if (Info.separateBeginEndCalls()) {
9318       bool EndMapTypesDiffer = false;
9319       for (uint64_t &Type : Mapping) {
9320         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9321           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9322           EndMapTypesDiffer = true;
9323         }
9324       }
9325       if (EndMapTypesDiffer) {
9326         MapTypesArrayInit =
9327             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9328         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9329         MapTypesArrayGbl = new llvm::GlobalVariable(
9330             CGM.getModule(), MapTypesArrayInit->getType(),
9331             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9332             MapTypesArrayInit, MaptypesName);
9333         MapTypesArrayGbl->setUnnamedAddr(
9334             llvm::GlobalValue::UnnamedAddr::Global);
9335         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9336       }
9337     }
9338 
9339     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9340       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9341       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9342           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9343           Info.BasePointersArray, 0, I);
9344       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9345           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9346       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9347       CGF.Builder.CreateStore(BPVal, BPAddr);
9348 
9349       if (Info.requiresDevicePointerInfo())
9350         if (const ValueDecl *DevVD =
9351                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9352           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9353 
9354       llvm::Value *PVal = CombinedInfo.Pointers[I];
9355       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9356           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9357           Info.PointersArray, 0, I);
9358       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9359           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9360       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9361       CGF.Builder.CreateStore(PVal, PAddr);
9362 
9363       if (hasRuntimeEvaluationCaptureSize) {
9364         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9365             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9366             Info.SizesArray,
9367             /*Idx0=*/0,
9368             /*Idx1=*/I);
9369         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9370         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9371                                                           CGM.Int64Ty,
9372                                                           /*isSigned=*/true),
9373                                 SAddr);
9374       }
9375 
9376       // Fill up the mapper array.
9377       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9378       if (CombinedInfo.Mappers[I]) {
9379         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9380             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9381         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9382         Info.HasMapper = true;
9383       }
9384       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9385       CGF.Builder.CreateStore(MFunc, MAddr);
9386     }
9387   }
9388 
9389   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9390       Info.NumberOfPtrs == 0)
9391     return;
9392 
9393   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9394 }
9395 
9396 namespace {
9397 /// Additional arguments for emitOffloadingArraysArgument function.
9398 struct ArgumentsOptions {
9399   bool ForEndCall = false;
9400   ArgumentsOptions() = default;
9401   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9402 };
9403 } // namespace
9404 
9405 /// Emit the arguments to be passed to the runtime library based on the
9406 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9407 /// ForEndCall, emit map types to be passed for the end of the region instead of
9408 /// the beginning.
9409 static void emitOffloadingArraysArgument(
9410     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9411     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9412     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9413     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9414     const ArgumentsOptions &Options = ArgumentsOptions()) {
9415   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9416          "expected region end call to runtime only when end call is separate");
9417   CodeGenModule &CGM = CGF.CGM;
9418   if (Info.NumberOfPtrs) {
9419     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9420         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9421         Info.BasePointersArray,
9422         /*Idx0=*/0, /*Idx1=*/0);
9423     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9424         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9425         Info.PointersArray,
9426         /*Idx0=*/0,
9427         /*Idx1=*/0);
9428     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9429         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9430         /*Idx0=*/0, /*Idx1=*/0);
9431     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9432         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9433         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9434                                                     : Info.MapTypesArray,
9435         /*Idx0=*/0,
9436         /*Idx1=*/0);
9437 
9438     // Only emit the mapper information arrays if debug information is
9439     // requested.
9440     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9441       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9442     else
9443       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9444           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9445           Info.MapNamesArray,
9446           /*Idx0=*/0,
9447           /*Idx1=*/0);
9448     // If there is no user-defined mapper, set the mapper array to nullptr to
9449     // avoid an unnecessary data privatization
9450     if (!Info.HasMapper)
9451       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9452     else
9453       MappersArrayArg =
9454           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9455   } else {
9456     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9457     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9458     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9459     MapTypesArrayArg =
9460         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9461     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9462     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9463   }
9464 }
9465 
9466 /// Check for inner distribute directive.
9467 static const OMPExecutableDirective *
9468 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9469   const auto *CS = D.getInnermostCapturedStmt();
9470   const auto *Body =
9471       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9472   const Stmt *ChildStmt =
9473       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9474 
9475   if (const auto *NestedDir =
9476           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9477     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9478     switch (D.getDirectiveKind()) {
9479     case OMPD_target:
9480       if (isOpenMPDistributeDirective(DKind))
9481         return NestedDir;
9482       if (DKind == OMPD_teams) {
9483         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9484             /*IgnoreCaptured=*/true);
9485         if (!Body)
9486           return nullptr;
9487         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9488         if (const auto *NND =
9489                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9490           DKind = NND->getDirectiveKind();
9491           if (isOpenMPDistributeDirective(DKind))
9492             return NND;
9493         }
9494       }
9495       return nullptr;
9496     case OMPD_target_teams:
9497       if (isOpenMPDistributeDirective(DKind))
9498         return NestedDir;
9499       return nullptr;
9500     case OMPD_target_parallel:
9501     case OMPD_target_simd:
9502     case OMPD_target_parallel_for:
9503     case OMPD_target_parallel_for_simd:
9504       return nullptr;
9505     case OMPD_target_teams_distribute:
9506     case OMPD_target_teams_distribute_simd:
9507     case OMPD_target_teams_distribute_parallel_for:
9508     case OMPD_target_teams_distribute_parallel_for_simd:
9509     case OMPD_parallel:
9510     case OMPD_for:
9511     case OMPD_parallel_for:
9512     case OMPD_parallel_master:
9513     case OMPD_parallel_sections:
9514     case OMPD_for_simd:
9515     case OMPD_parallel_for_simd:
9516     case OMPD_cancel:
9517     case OMPD_cancellation_point:
9518     case OMPD_ordered:
9519     case OMPD_threadprivate:
9520     case OMPD_allocate:
9521     case OMPD_task:
9522     case OMPD_simd:
9523     case OMPD_tile:
9524     case OMPD_sections:
9525     case OMPD_section:
9526     case OMPD_single:
9527     case OMPD_master:
9528     case OMPD_critical:
9529     case OMPD_taskyield:
9530     case OMPD_barrier:
9531     case OMPD_taskwait:
9532     case OMPD_taskgroup:
9533     case OMPD_atomic:
9534     case OMPD_flush:
9535     case OMPD_depobj:
9536     case OMPD_scan:
9537     case OMPD_teams:
9538     case OMPD_target_data:
9539     case OMPD_target_exit_data:
9540     case OMPD_target_enter_data:
9541     case OMPD_distribute:
9542     case OMPD_distribute_simd:
9543     case OMPD_distribute_parallel_for:
9544     case OMPD_distribute_parallel_for_simd:
9545     case OMPD_teams_distribute:
9546     case OMPD_teams_distribute_simd:
9547     case OMPD_teams_distribute_parallel_for:
9548     case OMPD_teams_distribute_parallel_for_simd:
9549     case OMPD_target_update:
9550     case OMPD_declare_simd:
9551     case OMPD_declare_variant:
9552     case OMPD_begin_declare_variant:
9553     case OMPD_end_declare_variant:
9554     case OMPD_declare_target:
9555     case OMPD_end_declare_target:
9556     case OMPD_declare_reduction:
9557     case OMPD_declare_mapper:
9558     case OMPD_taskloop:
9559     case OMPD_taskloop_simd:
9560     case OMPD_master_taskloop:
9561     case OMPD_master_taskloop_simd:
9562     case OMPD_parallel_master_taskloop:
9563     case OMPD_parallel_master_taskloop_simd:
9564     case OMPD_requires:
9565     case OMPD_unknown:
9566     default:
9567       llvm_unreachable("Unexpected directive.");
9568     }
9569   }
9570 
9571   return nullptr;
9572 }
9573 
9574 /// Emit the user-defined mapper function. The code generation follows the
9575 /// pattern in the example below.
9576 /// \code
9577 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9578 ///                                           void *base, void *begin,
9579 ///                                           int64_t size, int64_t type,
9580 ///                                           void *name = nullptr) {
9581 ///   // Allocate space for an array section first or add a base/begin for
9582 ///   // pointer dereference.
9583 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9584 ///       !maptype.IsDelete)
9585 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9586 ///                                 size*sizeof(Ty), clearToFromMember(type));
9587 ///   // Map members.
9588 ///   for (unsigned i = 0; i < size; i++) {
9589 ///     // For each component specified by this mapper:
9590 ///     for (auto c : begin[i]->all_components) {
9591 ///       if (c.hasMapper())
9592 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9593 ///                       c.arg_type, c.arg_name);
9594 ///       else
9595 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9596 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9597 ///                                     c.arg_name);
9598 ///     }
9599 ///   }
9600 ///   // Delete the array section.
9601 ///   if (size > 1 && maptype.IsDelete)
9602 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9603 ///                                 size*sizeof(Ty), clearToFromMember(type));
9604 /// }
9605 /// \endcode
9606 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9607                                             CodeGenFunction *CGF) {
9608   if (UDMMap.count(D) > 0)
9609     return;
9610   ASTContext &C = CGM.getContext();
9611   QualType Ty = D->getType();
9612   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9613   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9614   auto *MapperVarDecl =
9615       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9616   SourceLocation Loc = D->getLocation();
9617   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9618 
9619   // Prepare mapper function arguments and attributes.
9620   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9621                               C.VoidPtrTy, ImplicitParamDecl::Other);
9622   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9623                             ImplicitParamDecl::Other);
9624   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9625                              C.VoidPtrTy, ImplicitParamDecl::Other);
9626   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9627                             ImplicitParamDecl::Other);
9628   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9629                             ImplicitParamDecl::Other);
9630   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9631                             ImplicitParamDecl::Other);
9632   FunctionArgList Args;
9633   Args.push_back(&HandleArg);
9634   Args.push_back(&BaseArg);
9635   Args.push_back(&BeginArg);
9636   Args.push_back(&SizeArg);
9637   Args.push_back(&TypeArg);
9638   Args.push_back(&NameArg);
9639   const CGFunctionInfo &FnInfo =
9640       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9641   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9642   SmallString<64> TyStr;
9643   llvm::raw_svector_ostream Out(TyStr);
9644   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9645   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9646   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9647                                     Name, &CGM.getModule());
9648   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9649   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9650   // Start the mapper function code generation.
9651   CodeGenFunction MapperCGF(CGM);
9652   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9653   // Compute the starting and end addresses of array elements.
9654   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9655       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9656       C.getPointerType(Int64Ty), Loc);
9657   // Prepare common arguments for array initiation and deletion.
9658   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9659       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9660       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9661   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9662       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9663       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9664   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9665       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9666       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9667   // Convert the size in bytes into the number of array elements.
9668   Size = MapperCGF.Builder.CreateExactUDiv(
9669       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9670   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9671       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9672   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9673   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9674       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9675       C.getPointerType(Int64Ty), Loc);
9676 
9677   // Emit array initiation if this is an array section and \p MapType indicates
9678   // that memory allocation is required.
9679   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9680   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9681                              ElementSize, HeadBB, /*IsInit=*/true);
9682 
9683   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9684 
9685   // Emit the loop header block.
9686   MapperCGF.EmitBlock(HeadBB);
9687   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9688   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9689   // Evaluate whether the initial condition is satisfied.
9690   llvm::Value *IsEmpty =
9691       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9692   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9693   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9694 
9695   // Emit the loop body block.
9696   MapperCGF.EmitBlock(BodyBB);
9697   llvm::BasicBlock *LastBB = BodyBB;
9698   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9699       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9700   PtrPHI->addIncoming(PtrBegin, EntryBB);
9701   Address PtrCurrent =
9702       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9703                           .getAlignment()
9704                           .alignmentOfArrayElement(ElementSize));
9705   // Privatize the declared variable of mapper to be the current array element.
9706   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9707   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9708   (void)Scope.Privatize();
9709 
9710   // Get map clause information. Fill up the arrays with all mapped variables.
9711   MappableExprsHandler::MapCombinedInfoTy Info;
9712   MappableExprsHandler MEHandler(*D, MapperCGF);
9713   MEHandler.generateAllInfoForMapper(Info);
9714 
9715   // Call the runtime API __tgt_mapper_num_components to get the number of
9716   // pre-existing components.
9717   llvm::Value *OffloadingArgs[] = {Handle};
9718   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9719       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9720                                             OMPRTL___tgt_mapper_num_components),
9721       OffloadingArgs);
9722   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9723       PreviousSize,
9724       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9725 
9726   // Fill up the runtime mapper handle for all components.
9727   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9728     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9729         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9730     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9731         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9732     llvm::Value *CurSizeArg = Info.Sizes[I];
9733     llvm::Value *CurNameArg =
9734         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9735             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9736             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9737 
9738     // Extract the MEMBER_OF field from the map type.
9739     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9740     llvm::Value *MemberMapType =
9741         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9742 
9743     // Combine the map type inherited from user-defined mapper with that
9744     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9745     // bits of the \a MapType, which is the input argument of the mapper
9746     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9747     // bits of MemberMapType.
9748     // [OpenMP 5.0], 1.2.6. map-type decay.
9749     //        | alloc |  to   | from  | tofrom | release | delete
9750     // ----------------------------------------------------------
9751     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9752     // to     | alloc |  to   | alloc |   to   | release | delete
9753     // from   | alloc | alloc | from  |  from  | release | delete
9754     // tofrom | alloc |  to   | from  | tofrom | release | delete
9755     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9756         MapType,
9757         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9758                                    MappableExprsHandler::OMP_MAP_FROM));
9759     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9760     llvm::BasicBlock *AllocElseBB =
9761         MapperCGF.createBasicBlock("omp.type.alloc.else");
9762     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9763     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9764     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9765     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9766     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9767     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9768     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9769     MapperCGF.EmitBlock(AllocBB);
9770     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9771         MemberMapType,
9772         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9773                                      MappableExprsHandler::OMP_MAP_FROM)));
9774     MapperCGF.Builder.CreateBr(EndBB);
9775     MapperCGF.EmitBlock(AllocElseBB);
9776     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9777         LeftToFrom,
9778         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9779     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9780     // In case of to, clear OMP_MAP_FROM.
9781     MapperCGF.EmitBlock(ToBB);
9782     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9783         MemberMapType,
9784         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9785     MapperCGF.Builder.CreateBr(EndBB);
9786     MapperCGF.EmitBlock(ToElseBB);
9787     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9788         LeftToFrom,
9789         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9790     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9791     // In case of from, clear OMP_MAP_TO.
9792     MapperCGF.EmitBlock(FromBB);
9793     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9794         MemberMapType,
9795         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9796     // In case of tofrom, do nothing.
9797     MapperCGF.EmitBlock(EndBB);
9798     LastBB = EndBB;
9799     llvm::PHINode *CurMapType =
9800         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9801     CurMapType->addIncoming(AllocMapType, AllocBB);
9802     CurMapType->addIncoming(ToMapType, ToBB);
9803     CurMapType->addIncoming(FromMapType, FromBB);
9804     CurMapType->addIncoming(MemberMapType, ToElseBB);
9805 
9806     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9807                                      CurSizeArg, CurMapType, CurNameArg};
9808     if (Info.Mappers[I]) {
9809       // Call the corresponding mapper function.
9810       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9811           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9812       assert(MapperFunc && "Expect a valid mapper function is available.");
9813       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9814     } else {
9815       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9816       // data structure.
9817       MapperCGF.EmitRuntimeCall(
9818           OMPBuilder.getOrCreateRuntimeFunction(
9819               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9820           OffloadingArgs);
9821     }
9822   }
9823 
9824   // Update the pointer to point to the next element that needs to be mapped,
9825   // and check whether we have mapped all elements.
9826   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9827       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9828   PtrPHI->addIncoming(PtrNext, LastBB);
9829   llvm::Value *IsDone =
9830       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9831   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9832   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9833 
9834   MapperCGF.EmitBlock(ExitBB);
9835   // Emit array deletion if this is an array section and \p MapType indicates
9836   // that deletion is required.
9837   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9838                              ElementSize, DoneBB, /*IsInit=*/false);
9839 
9840   // Emit the function exit block.
9841   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9842   MapperCGF.FinishFunction();
9843   UDMMap.try_emplace(D, Fn);
9844   if (CGF) {
9845     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9846     Decls.second.push_back(D);
9847   }
9848 }
9849 
9850 /// Emit the array initialization or deletion portion for user-defined mapper
9851 /// code generation. First, it evaluates whether an array section is mapped and
9852 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9853 /// true, and \a MapType indicates to not delete this array, array
9854 /// initialization code is generated. If \a IsInit is false, and \a MapType
9855 /// indicates to not this array, array deletion code is generated.
9856 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9857     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9858     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9859     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9860   StringRef Prefix = IsInit ? ".init" : ".del";
9861 
9862   // Evaluate if this is an array section.
9863   llvm::BasicBlock *BodyBB =
9864       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9865   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9866       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9867   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9868       MapType,
9869       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9870   llvm::Value *DeleteCond;
9871   llvm::Value *Cond;
9872   if (IsInit) {
9873     // base != begin?
9874     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9875         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9876     // IsPtrAndObj?
9877     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9878         MapType,
9879         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9880     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9881     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9882     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9883     DeleteCond = MapperCGF.Builder.CreateIsNull(
9884         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9885   } else {
9886     Cond = IsArray;
9887     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9888         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9889   }
9890   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9891   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9892 
9893   MapperCGF.EmitBlock(BodyBB);
9894   // Get the array size by multiplying element size and element number (i.e., \p
9895   // Size).
9896   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9897       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9898   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9899   // memory allocation/deletion purpose only.
9900   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9901       MapType,
9902       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9903                                    MappableExprsHandler::OMP_MAP_FROM |
9904                                    MappableExprsHandler::OMP_MAP_MEMBER_OF)));
9905   llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9906 
9907   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9908   // data structure.
9909   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9910                                    ArraySize, MapTypeArg, MapNameArg};
9911   MapperCGF.EmitRuntimeCall(
9912       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9913                                             OMPRTL___tgt_push_mapper_component),
9914       OffloadingArgs);
9915 }
9916 
9917 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9918     const OMPDeclareMapperDecl *D) {
9919   auto I = UDMMap.find(D);
9920   if (I != UDMMap.end())
9921     return I->second;
9922   emitUserDefinedMapper(D);
9923   return UDMMap.lookup(D);
9924 }
9925 
9926 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9927     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9928     llvm::Value *DeviceID,
9929     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9930                                      const OMPLoopDirective &D)>
9931         SizeEmitter) {
9932   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9933   const OMPExecutableDirective *TD = &D;
9934   // Get nested teams distribute kind directive, if any.
9935   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9936     TD = getNestedDistributeDirective(CGM.getContext(), D);
9937   if (!TD)
9938     return;
9939   const auto *LD = cast<OMPLoopDirective>(TD);
9940   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9941                                                          PrePostActionTy &) {
9942     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9943       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9944       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9945       CGF.EmitRuntimeCall(
9946           OMPBuilder.getOrCreateRuntimeFunction(
9947               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9948           Args);
9949     }
9950   };
9951   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9952 }
9953 
9954 void CGOpenMPRuntime::emitTargetCall(
9955     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9956     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9957     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9958     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9959                                      const OMPLoopDirective &D)>
9960         SizeEmitter) {
9961   if (!CGF.HaveInsertPoint())
9962     return;
9963 
9964   assert(OutlinedFn && "Invalid outlined function!");
9965 
9966   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9967                                  D.hasClausesOfKind<OMPNowaitClause>();
9968   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9969   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9970   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9971                                             PrePostActionTy &) {
9972     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9973   };
9974   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9975 
9976   CodeGenFunction::OMPTargetDataInfo InputInfo;
9977   llvm::Value *MapTypesArray = nullptr;
9978   llvm::Value *MapNamesArray = nullptr;
9979   // Fill up the pointer arrays and transfer execution to the device.
9980   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9981                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9982                     &CapturedVars,
9983                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9984     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9985       // Reverse offloading is not supported, so just execute on the host.
9986       if (RequiresOuterTask) {
9987         CapturedVars.clear();
9988         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9989       }
9990       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9991       return;
9992     }
9993 
9994     // On top of the arrays that were filled up, the target offloading call
9995     // takes as arguments the device id as well as the host pointer. The host
9996     // pointer is used by the runtime library to identify the current target
9997     // region, so it only has to be unique and not necessarily point to
9998     // anything. It could be the pointer to the outlined function that
9999     // implements the target region, but we aren't using that so that the
10000     // compiler doesn't need to keep that, and could therefore inline the host
10001     // function if proven worthwhile during optimization.
10002 
10003     // From this point on, we need to have an ID of the target region defined.
10004     assert(OutlinedFnID && "Invalid outlined function ID!");
10005 
10006     // Emit device ID if any.
10007     llvm::Value *DeviceID;
10008     if (Device.getPointer()) {
10009       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10010               Device.getInt() == OMPC_DEVICE_device_num) &&
10011              "Expected device_num modifier.");
10012       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10013       DeviceID =
10014           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10015     } else {
10016       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10017     }
10018 
10019     // Emit the number of elements in the offloading arrays.
10020     llvm::Value *PointerNum =
10021         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10022 
10023     // Return value of the runtime offloading call.
10024     llvm::Value *Return;
10025 
10026     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10027     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10028 
10029     // Source location for the ident struct
10030     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10031 
10032     // Emit tripcount for the target loop-based directive.
10033     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10034 
10035     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10036     // The target region is an outlined function launched by the runtime
10037     // via calls __tgt_target() or __tgt_target_teams().
10038     //
10039     // __tgt_target() launches a target region with one team and one thread,
10040     // executing a serial region.  This master thread may in turn launch
10041     // more threads within its team upon encountering a parallel region,
10042     // however, no additional teams can be launched on the device.
10043     //
10044     // __tgt_target_teams() launches a target region with one or more teams,
10045     // each with one or more threads.  This call is required for target
10046     // constructs such as:
10047     //  'target teams'
10048     //  'target' / 'teams'
10049     //  'target teams distribute parallel for'
10050     //  'target parallel'
10051     // and so on.
10052     //
10053     // Note that on the host and CPU targets, the runtime implementation of
10054     // these calls simply call the outlined function without forking threads.
10055     // The outlined functions themselves have runtime calls to
10056     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10057     // the compiler in emitTeamsCall() and emitParallelCall().
10058     //
10059     // In contrast, on the NVPTX target, the implementation of
10060     // __tgt_target_teams() launches a GPU kernel with the requested number
10061     // of teams and threads so no additional calls to the runtime are required.
10062     if (NumTeams) {
10063       // If we have NumTeams defined this means that we have an enclosed teams
10064       // region. Therefore we also expect to have NumThreads defined. These two
10065       // values should be defined in the presence of a teams directive,
10066       // regardless of having any clauses associated. If the user is using teams
10067       // but no clauses, these two values will be the default that should be
10068       // passed to the runtime library - a 32-bit integer with the value zero.
10069       assert(NumThreads && "Thread limit expression should be available along "
10070                            "with number of teams.");
10071       llvm::Value *OffloadingArgs[] = {RTLoc,
10072                                        DeviceID,
10073                                        OutlinedFnID,
10074                                        PointerNum,
10075                                        InputInfo.BasePointersArray.getPointer(),
10076                                        InputInfo.PointersArray.getPointer(),
10077                                        InputInfo.SizesArray.getPointer(),
10078                                        MapTypesArray,
10079                                        MapNamesArray,
10080                                        InputInfo.MappersArray.getPointer(),
10081                                        NumTeams,
10082                                        NumThreads};
10083       Return = CGF.EmitRuntimeCall(
10084           OMPBuilder.getOrCreateRuntimeFunction(
10085               CGM.getModule(), HasNowait
10086                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10087                                    : OMPRTL___tgt_target_teams_mapper),
10088           OffloadingArgs);
10089     } else {
10090       llvm::Value *OffloadingArgs[] = {RTLoc,
10091                                        DeviceID,
10092                                        OutlinedFnID,
10093                                        PointerNum,
10094                                        InputInfo.BasePointersArray.getPointer(),
10095                                        InputInfo.PointersArray.getPointer(),
10096                                        InputInfo.SizesArray.getPointer(),
10097                                        MapTypesArray,
10098                                        MapNamesArray,
10099                                        InputInfo.MappersArray.getPointer()};
10100       Return = CGF.EmitRuntimeCall(
10101           OMPBuilder.getOrCreateRuntimeFunction(
10102               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10103                                          : OMPRTL___tgt_target_mapper),
10104           OffloadingArgs);
10105     }
10106 
10107     // Check the error code and execute the host version if required.
10108     llvm::BasicBlock *OffloadFailedBlock =
10109         CGF.createBasicBlock("omp_offload.failed");
10110     llvm::BasicBlock *OffloadContBlock =
10111         CGF.createBasicBlock("omp_offload.cont");
10112     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10113     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10114 
10115     CGF.EmitBlock(OffloadFailedBlock);
10116     if (RequiresOuterTask) {
10117       CapturedVars.clear();
10118       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10119     }
10120     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10121     CGF.EmitBranch(OffloadContBlock);
10122 
10123     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10124   };
10125 
10126   // Notify that the host version must be executed.
10127   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10128                     RequiresOuterTask](CodeGenFunction &CGF,
10129                                        PrePostActionTy &) {
10130     if (RequiresOuterTask) {
10131       CapturedVars.clear();
10132       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10133     }
10134     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10135   };
10136 
10137   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10138                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10139                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10140     // Fill up the arrays with all the captured variables.
10141     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10142 
10143     // Get mappable expression information.
10144     MappableExprsHandler MEHandler(D, CGF);
10145     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10146     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10147 
10148     auto RI = CS.getCapturedRecordDecl()->field_begin();
10149     auto *CV = CapturedVars.begin();
10150     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10151                                               CE = CS.capture_end();
10152          CI != CE; ++CI, ++RI, ++CV) {
10153       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10154       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10155 
10156       // VLA sizes are passed to the outlined region by copy and do not have map
10157       // information associated.
10158       if (CI->capturesVariableArrayType()) {
10159         CurInfo.Exprs.push_back(nullptr);
10160         CurInfo.BasePointers.push_back(*CV);
10161         CurInfo.Pointers.push_back(*CV);
10162         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10163             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10164         // Copy to the device as an argument. No need to retrieve it.
10165         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10166                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10167                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10168         CurInfo.Mappers.push_back(nullptr);
10169       } else {
10170         // If we have any information in the map clause, we use it, otherwise we
10171         // just do a default mapping.
10172         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10173         if (!CI->capturesThis())
10174           MappedVarSet.insert(CI->getCapturedVar());
10175         else
10176           MappedVarSet.insert(nullptr);
10177         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10178           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10179         // Generate correct mapping for variables captured by reference in
10180         // lambdas.
10181         if (CI->capturesVariable())
10182           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10183                                                   CurInfo, LambdaPointers);
10184       }
10185       // We expect to have at least an element of information for this capture.
10186       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10187              "Non-existing map pointer for capture!");
10188       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10189              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10190              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10191              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10192              "Inconsistent map information sizes!");
10193 
10194       // If there is an entry in PartialStruct it means we have a struct with
10195       // individual members mapped. Emit an extra combined entry.
10196       if (PartialStruct.Base.isValid()) {
10197         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10198         MEHandler.emitCombinedEntry(
10199             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10200             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10201       }
10202 
10203       // We need to append the results of this capture to what we already have.
10204       CombinedInfo.append(CurInfo);
10205     }
10206     // Adjust MEMBER_OF flags for the lambdas captures.
10207     MEHandler.adjustMemberOfForLambdaCaptures(
10208         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10209         CombinedInfo.Types);
10210     // Map any list items in a map clause that were not captures because they
10211     // weren't referenced within the construct.
10212     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10213 
10214     TargetDataInfo Info;
10215     // Fill up the arrays and create the arguments.
10216     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10217     emitOffloadingArraysArgument(
10218         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10219         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10220         {/*ForEndTask=*/false});
10221 
10222     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10223     InputInfo.BasePointersArray =
10224         Address(Info.BasePointersArray, CGM.getPointerAlign());
10225     InputInfo.PointersArray =
10226         Address(Info.PointersArray, CGM.getPointerAlign());
10227     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10228     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10229     MapTypesArray = Info.MapTypesArray;
10230     MapNamesArray = Info.MapNamesArray;
10231     if (RequiresOuterTask)
10232       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10233     else
10234       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10235   };
10236 
10237   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10238                              CodeGenFunction &CGF, PrePostActionTy &) {
10239     if (RequiresOuterTask) {
10240       CodeGenFunction::OMPTargetDataInfo InputInfo;
10241       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10242     } else {
10243       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10244     }
10245   };
10246 
10247   // If we have a target function ID it means that we need to support
10248   // offloading, otherwise, just execute on the host. We need to execute on host
10249   // regardless of the conditional in the if clause if, e.g., the user do not
10250   // specify target triples.
10251   if (OutlinedFnID) {
10252     if (IfCond) {
10253       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10254     } else {
10255       RegionCodeGenTy ThenRCG(TargetThenGen);
10256       ThenRCG(CGF);
10257     }
10258   } else {
10259     RegionCodeGenTy ElseRCG(TargetElseGen);
10260     ElseRCG(CGF);
10261   }
10262 }
10263 
10264 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10265                                                     StringRef ParentName) {
10266   if (!S)
10267     return;
10268 
10269   // Codegen OMP target directives that offload compute to the device.
10270   bool RequiresDeviceCodegen =
10271       isa<OMPExecutableDirective>(S) &&
10272       isOpenMPTargetExecutionDirective(
10273           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10274 
10275   if (RequiresDeviceCodegen) {
10276     const auto &E = *cast<OMPExecutableDirective>(S);
10277     unsigned DeviceID;
10278     unsigned FileID;
10279     unsigned Line;
10280     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10281                              FileID, Line);
10282 
10283     // Is this a target region that should not be emitted as an entry point? If
10284     // so just signal we are done with this target region.
10285     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10286                                                             ParentName, Line))
10287       return;
10288 
10289     switch (E.getDirectiveKind()) {
10290     case OMPD_target:
10291       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10292                                                    cast<OMPTargetDirective>(E));
10293       break;
10294     case OMPD_target_parallel:
10295       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10296           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10297       break;
10298     case OMPD_target_teams:
10299       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10300           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10301       break;
10302     case OMPD_target_teams_distribute:
10303       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10304           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10305       break;
10306     case OMPD_target_teams_distribute_simd:
10307       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10308           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10309       break;
10310     case OMPD_target_parallel_for:
10311       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10312           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10313       break;
10314     case OMPD_target_parallel_for_simd:
10315       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10316           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10317       break;
10318     case OMPD_target_simd:
10319       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10320           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10321       break;
10322     case OMPD_target_teams_distribute_parallel_for:
10323       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10324           CGM, ParentName,
10325           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10326       break;
10327     case OMPD_target_teams_distribute_parallel_for_simd:
10328       CodeGenFunction::
10329           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10330               CGM, ParentName,
10331               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10332       break;
10333     case OMPD_parallel:
10334     case OMPD_for:
10335     case OMPD_parallel_for:
10336     case OMPD_parallel_master:
10337     case OMPD_parallel_sections:
10338     case OMPD_for_simd:
10339     case OMPD_parallel_for_simd:
10340     case OMPD_cancel:
10341     case OMPD_cancellation_point:
10342     case OMPD_ordered:
10343     case OMPD_threadprivate:
10344     case OMPD_allocate:
10345     case OMPD_task:
10346     case OMPD_simd:
10347     case OMPD_tile:
10348     case OMPD_sections:
10349     case OMPD_section:
10350     case OMPD_single:
10351     case OMPD_master:
10352     case OMPD_critical:
10353     case OMPD_taskyield:
10354     case OMPD_barrier:
10355     case OMPD_taskwait:
10356     case OMPD_taskgroup:
10357     case OMPD_atomic:
10358     case OMPD_flush:
10359     case OMPD_depobj:
10360     case OMPD_scan:
10361     case OMPD_teams:
10362     case OMPD_target_data:
10363     case OMPD_target_exit_data:
10364     case OMPD_target_enter_data:
10365     case OMPD_distribute:
10366     case OMPD_distribute_simd:
10367     case OMPD_distribute_parallel_for:
10368     case OMPD_distribute_parallel_for_simd:
10369     case OMPD_teams_distribute:
10370     case OMPD_teams_distribute_simd:
10371     case OMPD_teams_distribute_parallel_for:
10372     case OMPD_teams_distribute_parallel_for_simd:
10373     case OMPD_target_update:
10374     case OMPD_declare_simd:
10375     case OMPD_declare_variant:
10376     case OMPD_begin_declare_variant:
10377     case OMPD_end_declare_variant:
10378     case OMPD_declare_target:
10379     case OMPD_end_declare_target:
10380     case OMPD_declare_reduction:
10381     case OMPD_declare_mapper:
10382     case OMPD_taskloop:
10383     case OMPD_taskloop_simd:
10384     case OMPD_master_taskloop:
10385     case OMPD_master_taskloop_simd:
10386     case OMPD_parallel_master_taskloop:
10387     case OMPD_parallel_master_taskloop_simd:
10388     case OMPD_requires:
10389     case OMPD_unknown:
10390     default:
10391       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10392     }
10393     return;
10394   }
10395 
10396   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10397     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10398       return;
10399 
10400     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10401     return;
10402   }
10403 
10404   // If this is a lambda function, look into its body.
10405   if (const auto *L = dyn_cast<LambdaExpr>(S))
10406     S = L->getBody();
10407 
10408   // Keep looking for target regions recursively.
10409   for (const Stmt *II : S->children())
10410     scanForTargetRegionsFunctions(II, ParentName);
10411 }
10412 
10413 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10414   // If emitting code for the host, we do not process FD here. Instead we do
10415   // the normal code generation.
10416   if (!CGM.getLangOpts().OpenMPIsDevice) {
10417     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10418       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10419           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10420       // Do not emit device_type(nohost) functions for the host.
10421       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10422         return true;
10423     }
10424     return false;
10425   }
10426 
10427   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10428   // Try to detect target regions in the function.
10429   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10430     StringRef Name = CGM.getMangledName(GD);
10431     scanForTargetRegionsFunctions(FD->getBody(), Name);
10432     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10433         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10434     // Do not emit device_type(nohost) functions for the host.
10435     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10436       return true;
10437   }
10438 
10439   // Do not to emit function if it is not marked as declare target.
10440   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10441          AlreadyEmittedTargetDecls.count(VD) == 0;
10442 }
10443 
10444 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10445   if (!CGM.getLangOpts().OpenMPIsDevice)
10446     return false;
10447 
10448   // Check if there are Ctors/Dtors in this declaration and look for target
10449   // regions in it. We use the complete variant to produce the kernel name
10450   // mangling.
10451   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10452   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10453     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10454       StringRef ParentName =
10455           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10456       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10457     }
10458     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10459       StringRef ParentName =
10460           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10461       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10462     }
10463   }
10464 
10465   // Do not to emit variable if it is not marked as declare target.
10466   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10467       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10468           cast<VarDecl>(GD.getDecl()));
10469   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10470       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10471        HasRequiresUnifiedSharedMemory)) {
10472     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10473     return true;
10474   }
10475   return false;
10476 }
10477 
10478 llvm::Constant *
10479 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10480                                                 const VarDecl *VD) {
10481   assert(VD->getType().isConstant(CGM.getContext()) &&
10482          "Expected constant variable.");
10483   StringRef VarName;
10484   llvm::Constant *Addr;
10485   llvm::GlobalValue::LinkageTypes Linkage;
10486   QualType Ty = VD->getType();
10487   SmallString<128> Buffer;
10488   {
10489     unsigned DeviceID;
10490     unsigned FileID;
10491     unsigned Line;
10492     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10493                              FileID, Line);
10494     llvm::raw_svector_ostream OS(Buffer);
10495     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10496        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10497     VarName = OS.str();
10498   }
10499   Linkage = llvm::GlobalValue::InternalLinkage;
10500   Addr =
10501       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10502                                   getDefaultFirstprivateAddressSpace());
10503   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10504   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10505   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10506   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10507       VarName, Addr, VarSize,
10508       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10509   return Addr;
10510 }
10511 
10512 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10513                                                    llvm::Constant *Addr) {
10514   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10515       !CGM.getLangOpts().OpenMPIsDevice)
10516     return;
10517   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10518       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10519   if (!Res) {
10520     if (CGM.getLangOpts().OpenMPIsDevice) {
10521       // Register non-target variables being emitted in device code (debug info
10522       // may cause this).
10523       StringRef VarName = CGM.getMangledName(VD);
10524       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10525     }
10526     return;
10527   }
10528   // Register declare target variables.
10529   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10530   StringRef VarName;
10531   CharUnits VarSize;
10532   llvm::GlobalValue::LinkageTypes Linkage;
10533 
10534   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10535       !HasRequiresUnifiedSharedMemory) {
10536     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10537     VarName = CGM.getMangledName(VD);
10538     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10539       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10540       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10541     } else {
10542       VarSize = CharUnits::Zero();
10543     }
10544     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10545     // Temp solution to prevent optimizations of the internal variables.
10546     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10547       std::string RefName = getName({VarName, "ref"});
10548       if (!CGM.GetGlobalValue(RefName)) {
10549         llvm::Constant *AddrRef =
10550             getOrCreateInternalVariable(Addr->getType(), RefName);
10551         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10552         GVAddrRef->setConstant(/*Val=*/true);
10553         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10554         GVAddrRef->setInitializer(Addr);
10555         CGM.addCompilerUsedGlobal(GVAddrRef);
10556       }
10557     }
10558   } else {
10559     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10560             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10561              HasRequiresUnifiedSharedMemory)) &&
10562            "Declare target attribute must link or to with unified memory.");
10563     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10564       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10565     else
10566       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10567 
10568     if (CGM.getLangOpts().OpenMPIsDevice) {
10569       VarName = Addr->getName();
10570       Addr = nullptr;
10571     } else {
10572       VarName = getAddrOfDeclareTargetVar(VD).getName();
10573       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10574     }
10575     VarSize = CGM.getPointerSize();
10576     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10577   }
10578 
10579   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10580       VarName, Addr, VarSize, Flags, Linkage);
10581 }
10582 
10583 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10584   if (isa<FunctionDecl>(GD.getDecl()) ||
10585       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10586     return emitTargetFunctions(GD);
10587 
10588   return emitTargetGlobalVariable(GD);
10589 }
10590 
10591 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10592   for (const VarDecl *VD : DeferredGlobalVariables) {
10593     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10594         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10595     if (!Res)
10596       continue;
10597     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10598         !HasRequiresUnifiedSharedMemory) {
10599       CGM.EmitGlobal(VD);
10600     } else {
10601       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10602               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10603                HasRequiresUnifiedSharedMemory)) &&
10604              "Expected link clause or to clause with unified memory.");
10605       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10606     }
10607   }
10608 }
10609 
10610 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10611     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10612   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10613          " Expected target-based directive.");
10614 }
10615 
10616 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10617   for (const OMPClause *Clause : D->clauselists()) {
10618     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10619       HasRequiresUnifiedSharedMemory = true;
10620     } else if (const auto *AC =
10621                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10622       switch (AC->getAtomicDefaultMemOrderKind()) {
10623       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10624         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10625         break;
10626       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10627         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10628         break;
10629       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10630         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10631         break;
10632       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10633         break;
10634       }
10635     }
10636   }
10637 }
10638 
10639 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10640   return RequiresAtomicOrdering;
10641 }
10642 
10643 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10644                                                        LangAS &AS) {
10645   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10646     return false;
10647   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10648   switch(A->getAllocatorType()) {
10649   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10650   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10651   // Not supported, fallback to the default mem space.
10652   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10653   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10654   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10655   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10656   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10657   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10658   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10659     AS = LangAS::Default;
10660     return true;
10661   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10662     llvm_unreachable("Expected predefined allocator for the variables with the "
10663                      "static storage.");
10664   }
10665   return false;
10666 }
10667 
10668 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10669   return HasRequiresUnifiedSharedMemory;
10670 }
10671 
10672 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10673     CodeGenModule &CGM)
10674     : CGM(CGM) {
10675   if (CGM.getLangOpts().OpenMPIsDevice) {
10676     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10677     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10678   }
10679 }
10680 
10681 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10682   if (CGM.getLangOpts().OpenMPIsDevice)
10683     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10684 }
10685 
10686 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10687   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10688     return true;
10689 
10690   const auto *D = cast<FunctionDecl>(GD.getDecl());
10691   // Do not to emit function if it is marked as declare target as it was already
10692   // emitted.
10693   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10694     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10695       if (auto *F = dyn_cast_or_null<llvm::Function>(
10696               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10697         return !F->isDeclaration();
10698       return false;
10699     }
10700     return true;
10701   }
10702 
10703   return !AlreadyEmittedTargetDecls.insert(D).second;
10704 }
10705 
10706 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10707   // If we don't have entries or if we are emitting code for the device, we
10708   // don't need to do anything.
10709   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10710       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10711       (OffloadEntriesInfoManager.empty() &&
10712        !HasEmittedDeclareTargetRegion &&
10713        !HasEmittedTargetRegion))
10714     return nullptr;
10715 
10716   // Create and register the function that handles the requires directives.
10717   ASTContext &C = CGM.getContext();
10718 
10719   llvm::Function *RequiresRegFn;
10720   {
10721     CodeGenFunction CGF(CGM);
10722     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10723     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10724     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10725     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10726     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10727     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10728     // TODO: check for other requires clauses.
10729     // The requires directive takes effect only when a target region is
10730     // present in the compilation unit. Otherwise it is ignored and not
10731     // passed to the runtime. This avoids the runtime from throwing an error
10732     // for mismatching requires clauses across compilation units that don't
10733     // contain at least 1 target region.
10734     assert((HasEmittedTargetRegion ||
10735             HasEmittedDeclareTargetRegion ||
10736             !OffloadEntriesInfoManager.empty()) &&
10737            "Target or declare target region expected.");
10738     if (HasRequiresUnifiedSharedMemory)
10739       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10740     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10741                             CGM.getModule(), OMPRTL___tgt_register_requires),
10742                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10743     CGF.FinishFunction();
10744   }
10745   return RequiresRegFn;
10746 }
10747 
10748 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10749                                     const OMPExecutableDirective &D,
10750                                     SourceLocation Loc,
10751                                     llvm::Function *OutlinedFn,
10752                                     ArrayRef<llvm::Value *> CapturedVars) {
10753   if (!CGF.HaveInsertPoint())
10754     return;
10755 
10756   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10757   CodeGenFunction::RunCleanupsScope Scope(CGF);
10758 
10759   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10760   llvm::Value *Args[] = {
10761       RTLoc,
10762       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10763       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10764   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10765   RealArgs.append(std::begin(Args), std::end(Args));
10766   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10767 
10768   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10769       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10770   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10771 }
10772 
10773 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10774                                          const Expr *NumTeams,
10775                                          const Expr *ThreadLimit,
10776                                          SourceLocation Loc) {
10777   if (!CGF.HaveInsertPoint())
10778     return;
10779 
10780   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10781 
10782   llvm::Value *NumTeamsVal =
10783       NumTeams
10784           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10785                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10786           : CGF.Builder.getInt32(0);
10787 
10788   llvm::Value *ThreadLimitVal =
10789       ThreadLimit
10790           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10791                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10792           : CGF.Builder.getInt32(0);
10793 
10794   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10795   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10796                                      ThreadLimitVal};
10797   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10798                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10799                       PushNumTeamsArgs);
10800 }
10801 
10802 void CGOpenMPRuntime::emitTargetDataCalls(
10803     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10804     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10805   if (!CGF.HaveInsertPoint())
10806     return;
10807 
10808   // Action used to replace the default codegen action and turn privatization
10809   // off.
10810   PrePostActionTy NoPrivAction;
10811 
10812   // Generate the code for the opening of the data environment. Capture all the
10813   // arguments of the runtime call by reference because they are used in the
10814   // closing of the region.
10815   auto &&BeginThenGen = [this, &D, Device, &Info,
10816                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10817     // Fill up the arrays with all the mapped variables.
10818     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10819 
10820     // Get map clause information.
10821     MappableExprsHandler MEHandler(D, CGF);
10822     MEHandler.generateAllInfo(CombinedInfo);
10823 
10824     // Fill up the arrays and create the arguments.
10825     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10826                          /*IsNonContiguous=*/true);
10827 
10828     llvm::Value *BasePointersArrayArg = nullptr;
10829     llvm::Value *PointersArrayArg = nullptr;
10830     llvm::Value *SizesArrayArg = nullptr;
10831     llvm::Value *MapTypesArrayArg = nullptr;
10832     llvm::Value *MapNamesArrayArg = nullptr;
10833     llvm::Value *MappersArrayArg = nullptr;
10834     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10835                                  SizesArrayArg, MapTypesArrayArg,
10836                                  MapNamesArrayArg, MappersArrayArg, Info);
10837 
10838     // Emit device ID if any.
10839     llvm::Value *DeviceID = nullptr;
10840     if (Device) {
10841       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10842                                            CGF.Int64Ty, /*isSigned=*/true);
10843     } else {
10844       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10845     }
10846 
10847     // Emit the number of elements in the offloading arrays.
10848     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10849     //
10850     // Source location for the ident struct
10851     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10852 
10853     llvm::Value *OffloadingArgs[] = {RTLoc,
10854                                      DeviceID,
10855                                      PointerNum,
10856                                      BasePointersArrayArg,
10857                                      PointersArrayArg,
10858                                      SizesArrayArg,
10859                                      MapTypesArrayArg,
10860                                      MapNamesArrayArg,
10861                                      MappersArrayArg};
10862     CGF.EmitRuntimeCall(
10863         OMPBuilder.getOrCreateRuntimeFunction(
10864             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10865         OffloadingArgs);
10866 
10867     // If device pointer privatization is required, emit the body of the region
10868     // here. It will have to be duplicated: with and without privatization.
10869     if (!Info.CaptureDeviceAddrMap.empty())
10870       CodeGen(CGF);
10871   };
10872 
10873   // Generate code for the closing of the data region.
10874   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10875                                                 PrePostActionTy &) {
10876     assert(Info.isValid() && "Invalid data environment closing arguments.");
10877 
10878     llvm::Value *BasePointersArrayArg = nullptr;
10879     llvm::Value *PointersArrayArg = nullptr;
10880     llvm::Value *SizesArrayArg = nullptr;
10881     llvm::Value *MapTypesArrayArg = nullptr;
10882     llvm::Value *MapNamesArrayArg = nullptr;
10883     llvm::Value *MappersArrayArg = nullptr;
10884     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10885                                  SizesArrayArg, MapTypesArrayArg,
10886                                  MapNamesArrayArg, MappersArrayArg, Info,
10887                                  {/*ForEndCall=*/true});
10888 
10889     // Emit device ID if any.
10890     llvm::Value *DeviceID = nullptr;
10891     if (Device) {
10892       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10893                                            CGF.Int64Ty, /*isSigned=*/true);
10894     } else {
10895       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10896     }
10897 
10898     // Emit the number of elements in the offloading arrays.
10899     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10900 
10901     // Source location for the ident struct
10902     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10903 
10904     llvm::Value *OffloadingArgs[] = {RTLoc,
10905                                      DeviceID,
10906                                      PointerNum,
10907                                      BasePointersArrayArg,
10908                                      PointersArrayArg,
10909                                      SizesArrayArg,
10910                                      MapTypesArrayArg,
10911                                      MapNamesArrayArg,
10912                                      MappersArrayArg};
10913     CGF.EmitRuntimeCall(
10914         OMPBuilder.getOrCreateRuntimeFunction(
10915             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10916         OffloadingArgs);
10917   };
10918 
10919   // If we need device pointer privatization, we need to emit the body of the
10920   // region with no privatization in the 'else' branch of the conditional.
10921   // Otherwise, we don't have to do anything.
10922   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10923                                                          PrePostActionTy &) {
10924     if (!Info.CaptureDeviceAddrMap.empty()) {
10925       CodeGen.setAction(NoPrivAction);
10926       CodeGen(CGF);
10927     }
10928   };
10929 
10930   // We don't have to do anything to close the region if the if clause evaluates
10931   // to false.
10932   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10933 
10934   if (IfCond) {
10935     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10936   } else {
10937     RegionCodeGenTy RCG(BeginThenGen);
10938     RCG(CGF);
10939   }
10940 
10941   // If we don't require privatization of device pointers, we emit the body in
10942   // between the runtime calls. This avoids duplicating the body code.
10943   if (Info.CaptureDeviceAddrMap.empty()) {
10944     CodeGen.setAction(NoPrivAction);
10945     CodeGen(CGF);
10946   }
10947 
10948   if (IfCond) {
10949     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10950   } else {
10951     RegionCodeGenTy RCG(EndThenGen);
10952     RCG(CGF);
10953   }
10954 }
10955 
10956 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10957     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10958     const Expr *Device) {
10959   if (!CGF.HaveInsertPoint())
10960     return;
10961 
10962   assert((isa<OMPTargetEnterDataDirective>(D) ||
10963           isa<OMPTargetExitDataDirective>(D) ||
10964           isa<OMPTargetUpdateDirective>(D)) &&
10965          "Expecting either target enter, exit data, or update directives.");
10966 
10967   CodeGenFunction::OMPTargetDataInfo InputInfo;
10968   llvm::Value *MapTypesArray = nullptr;
10969   llvm::Value *MapNamesArray = nullptr;
10970   // Generate the code for the opening of the data environment.
10971   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10972                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10973     // Emit device ID if any.
10974     llvm::Value *DeviceID = nullptr;
10975     if (Device) {
10976       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10977                                            CGF.Int64Ty, /*isSigned=*/true);
10978     } else {
10979       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10980     }
10981 
10982     // Emit the number of elements in the offloading arrays.
10983     llvm::Constant *PointerNum =
10984         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10985 
10986     // Source location for the ident struct
10987     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10988 
10989     llvm::Value *OffloadingArgs[] = {RTLoc,
10990                                      DeviceID,
10991                                      PointerNum,
10992                                      InputInfo.BasePointersArray.getPointer(),
10993                                      InputInfo.PointersArray.getPointer(),
10994                                      InputInfo.SizesArray.getPointer(),
10995                                      MapTypesArray,
10996                                      MapNamesArray,
10997                                      InputInfo.MappersArray.getPointer()};
10998 
10999     // Select the right runtime function call for each standalone
11000     // directive.
11001     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11002     RuntimeFunction RTLFn;
11003     switch (D.getDirectiveKind()) {
11004     case OMPD_target_enter_data:
11005       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11006                         : OMPRTL___tgt_target_data_begin_mapper;
11007       break;
11008     case OMPD_target_exit_data:
11009       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11010                         : OMPRTL___tgt_target_data_end_mapper;
11011       break;
11012     case OMPD_target_update:
11013       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11014                         : OMPRTL___tgt_target_data_update_mapper;
11015       break;
11016     case OMPD_parallel:
11017     case OMPD_for:
11018     case OMPD_parallel_for:
11019     case OMPD_parallel_master:
11020     case OMPD_parallel_sections:
11021     case OMPD_for_simd:
11022     case OMPD_parallel_for_simd:
11023     case OMPD_cancel:
11024     case OMPD_cancellation_point:
11025     case OMPD_ordered:
11026     case OMPD_threadprivate:
11027     case OMPD_allocate:
11028     case OMPD_task:
11029     case OMPD_simd:
11030     case OMPD_tile:
11031     case OMPD_sections:
11032     case OMPD_section:
11033     case OMPD_single:
11034     case OMPD_master:
11035     case OMPD_critical:
11036     case OMPD_taskyield:
11037     case OMPD_barrier:
11038     case OMPD_taskwait:
11039     case OMPD_taskgroup:
11040     case OMPD_atomic:
11041     case OMPD_flush:
11042     case OMPD_depobj:
11043     case OMPD_scan:
11044     case OMPD_teams:
11045     case OMPD_target_data:
11046     case OMPD_distribute:
11047     case OMPD_distribute_simd:
11048     case OMPD_distribute_parallel_for:
11049     case OMPD_distribute_parallel_for_simd:
11050     case OMPD_teams_distribute:
11051     case OMPD_teams_distribute_simd:
11052     case OMPD_teams_distribute_parallel_for:
11053     case OMPD_teams_distribute_parallel_for_simd:
11054     case OMPD_declare_simd:
11055     case OMPD_declare_variant:
11056     case OMPD_begin_declare_variant:
11057     case OMPD_end_declare_variant:
11058     case OMPD_declare_target:
11059     case OMPD_end_declare_target:
11060     case OMPD_declare_reduction:
11061     case OMPD_declare_mapper:
11062     case OMPD_taskloop:
11063     case OMPD_taskloop_simd:
11064     case OMPD_master_taskloop:
11065     case OMPD_master_taskloop_simd:
11066     case OMPD_parallel_master_taskloop:
11067     case OMPD_parallel_master_taskloop_simd:
11068     case OMPD_target:
11069     case OMPD_target_simd:
11070     case OMPD_target_teams_distribute:
11071     case OMPD_target_teams_distribute_simd:
11072     case OMPD_target_teams_distribute_parallel_for:
11073     case OMPD_target_teams_distribute_parallel_for_simd:
11074     case OMPD_target_teams:
11075     case OMPD_target_parallel:
11076     case OMPD_target_parallel_for:
11077     case OMPD_target_parallel_for_simd:
11078     case OMPD_requires:
11079     case OMPD_unknown:
11080     default:
11081       llvm_unreachable("Unexpected standalone target data directive.");
11082       break;
11083     }
11084     CGF.EmitRuntimeCall(
11085         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11086         OffloadingArgs);
11087   };
11088 
11089   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11090                           &MapNamesArray](CodeGenFunction &CGF,
11091                                           PrePostActionTy &) {
11092     // Fill up the arrays with all the mapped variables.
11093     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11094 
11095     // Get map clause information.
11096     MappableExprsHandler MEHandler(D, CGF);
11097     MEHandler.generateAllInfo(CombinedInfo);
11098 
11099     TargetDataInfo Info;
11100     // Fill up the arrays and create the arguments.
11101     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11102                          /*IsNonContiguous=*/true);
11103     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11104                              D.hasClausesOfKind<OMPNowaitClause>();
11105     emitOffloadingArraysArgument(
11106         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11107         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11108         {/*ForEndTask=*/false});
11109     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11110     InputInfo.BasePointersArray =
11111         Address(Info.BasePointersArray, CGM.getPointerAlign());
11112     InputInfo.PointersArray =
11113         Address(Info.PointersArray, CGM.getPointerAlign());
11114     InputInfo.SizesArray =
11115         Address(Info.SizesArray, CGM.getPointerAlign());
11116     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11117     MapTypesArray = Info.MapTypesArray;
11118     MapNamesArray = Info.MapNamesArray;
11119     if (RequiresOuterTask)
11120       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11121     else
11122       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11123   };
11124 
11125   if (IfCond) {
11126     emitIfClause(CGF, IfCond, TargetThenGen,
11127                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11128   } else {
11129     RegionCodeGenTy ThenRCG(TargetThenGen);
11130     ThenRCG(CGF);
11131   }
11132 }
11133 
11134 namespace {
11135   /// Kind of parameter in a function with 'declare simd' directive.
11136   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11137   /// Attribute set of the parameter.
11138   struct ParamAttrTy {
11139     ParamKindTy Kind = Vector;
11140     llvm::APSInt StrideOrArg;
11141     llvm::APSInt Alignment;
11142   };
11143 } // namespace
11144 
11145 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11146                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11147   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11148   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11149   // of that clause. The VLEN value must be power of 2.
11150   // In other case the notion of the function`s "characteristic data type" (CDT)
11151   // is used to compute the vector length.
11152   // CDT is defined in the following order:
11153   //   a) For non-void function, the CDT is the return type.
11154   //   b) If the function has any non-uniform, non-linear parameters, then the
11155   //   CDT is the type of the first such parameter.
11156   //   c) If the CDT determined by a) or b) above is struct, union, or class
11157   //   type which is pass-by-value (except for the type that maps to the
11158   //   built-in complex data type), the characteristic data type is int.
11159   //   d) If none of the above three cases is applicable, the CDT is int.
11160   // The VLEN is then determined based on the CDT and the size of vector
11161   // register of that ISA for which current vector version is generated. The
11162   // VLEN is computed using the formula below:
11163   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11164   // where vector register size specified in section 3.2.1 Registers and the
11165   // Stack Frame of original AMD64 ABI document.
11166   QualType RetType = FD->getReturnType();
11167   if (RetType.isNull())
11168     return 0;
11169   ASTContext &C = FD->getASTContext();
11170   QualType CDT;
11171   if (!RetType.isNull() && !RetType->isVoidType()) {
11172     CDT = RetType;
11173   } else {
11174     unsigned Offset = 0;
11175     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11176       if (ParamAttrs[Offset].Kind == Vector)
11177         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11178       ++Offset;
11179     }
11180     if (CDT.isNull()) {
11181       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11182         if (ParamAttrs[I + Offset].Kind == Vector) {
11183           CDT = FD->getParamDecl(I)->getType();
11184           break;
11185         }
11186       }
11187     }
11188   }
11189   if (CDT.isNull())
11190     CDT = C.IntTy;
11191   CDT = CDT->getCanonicalTypeUnqualified();
11192   if (CDT->isRecordType() || CDT->isUnionType())
11193     CDT = C.IntTy;
11194   return C.getTypeSize(CDT);
11195 }
11196 
11197 static void
11198 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11199                            const llvm::APSInt &VLENVal,
11200                            ArrayRef<ParamAttrTy> ParamAttrs,
11201                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11202   struct ISADataTy {
11203     char ISA;
11204     unsigned VecRegSize;
11205   };
11206   ISADataTy ISAData[] = {
11207       {
11208           'b', 128
11209       }, // SSE
11210       {
11211           'c', 256
11212       }, // AVX
11213       {
11214           'd', 256
11215       }, // AVX2
11216       {
11217           'e', 512
11218       }, // AVX512
11219   };
11220   llvm::SmallVector<char, 2> Masked;
11221   switch (State) {
11222   case OMPDeclareSimdDeclAttr::BS_Undefined:
11223     Masked.push_back('N');
11224     Masked.push_back('M');
11225     break;
11226   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11227     Masked.push_back('N');
11228     break;
11229   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11230     Masked.push_back('M');
11231     break;
11232   }
11233   for (char Mask : Masked) {
11234     for (const ISADataTy &Data : ISAData) {
11235       SmallString<256> Buffer;
11236       llvm::raw_svector_ostream Out(Buffer);
11237       Out << "_ZGV" << Data.ISA << Mask;
11238       if (!VLENVal) {
11239         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11240         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11241         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11242       } else {
11243         Out << VLENVal;
11244       }
11245       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11246         switch (ParamAttr.Kind){
11247         case LinearWithVarStride:
11248           Out << 's' << ParamAttr.StrideOrArg;
11249           break;
11250         case Linear:
11251           Out << 'l';
11252           if (ParamAttr.StrideOrArg != 1)
11253             Out << ParamAttr.StrideOrArg;
11254           break;
11255         case Uniform:
11256           Out << 'u';
11257           break;
11258         case Vector:
11259           Out << 'v';
11260           break;
11261         }
11262         if (!!ParamAttr.Alignment)
11263           Out << 'a' << ParamAttr.Alignment;
11264       }
11265       Out << '_' << Fn->getName();
11266       Fn->addFnAttr(Out.str());
11267     }
11268   }
11269 }
11270 
11271 // This are the Functions that are needed to mangle the name of the
11272 // vector functions generated by the compiler, according to the rules
11273 // defined in the "Vector Function ABI specifications for AArch64",
11274 // available at
11275 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11276 
11277 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11278 ///
11279 /// TODO: Need to implement the behavior for reference marked with a
11280 /// var or no linear modifiers (1.b in the section). For this, we
11281 /// need to extend ParamKindTy to support the linear modifiers.
11282 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11283   QT = QT.getCanonicalType();
11284 
11285   if (QT->isVoidType())
11286     return false;
11287 
11288   if (Kind == ParamKindTy::Uniform)
11289     return false;
11290 
11291   if (Kind == ParamKindTy::Linear)
11292     return false;
11293 
11294   // TODO: Handle linear references with modifiers
11295 
11296   if (Kind == ParamKindTy::LinearWithVarStride)
11297     return false;
11298 
11299   return true;
11300 }
11301 
11302 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11303 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11304   QT = QT.getCanonicalType();
11305   unsigned Size = C.getTypeSize(QT);
11306 
11307   // Only scalars and complex within 16 bytes wide set PVB to true.
11308   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11309     return false;
11310 
11311   if (QT->isFloatingType())
11312     return true;
11313 
11314   if (QT->isIntegerType())
11315     return true;
11316 
11317   if (QT->isPointerType())
11318     return true;
11319 
11320   // TODO: Add support for complex types (section 3.1.2, item 2).
11321 
11322   return false;
11323 }
11324 
11325 /// Computes the lane size (LS) of a return type or of an input parameter,
11326 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11327 /// TODO: Add support for references, section 3.2.1, item 1.
11328 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11329   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11330     QualType PTy = QT.getCanonicalType()->getPointeeType();
11331     if (getAArch64PBV(PTy, C))
11332       return C.getTypeSize(PTy);
11333   }
11334   if (getAArch64PBV(QT, C))
11335     return C.getTypeSize(QT);
11336 
11337   return C.getTypeSize(C.getUIntPtrType());
11338 }
11339 
11340 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11341 // signature of the scalar function, as defined in 3.2.2 of the
11342 // AAVFABI.
11343 static std::tuple<unsigned, unsigned, bool>
11344 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11345   QualType RetType = FD->getReturnType().getCanonicalType();
11346 
11347   ASTContext &C = FD->getASTContext();
11348 
11349   bool OutputBecomesInput = false;
11350 
11351   llvm::SmallVector<unsigned, 8> Sizes;
11352   if (!RetType->isVoidType()) {
11353     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11354     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11355       OutputBecomesInput = true;
11356   }
11357   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11358     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11359     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11360   }
11361 
11362   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11363   // The LS of a function parameter / return value can only be a power
11364   // of 2, starting from 8 bits, up to 128.
11365   assert(std::all_of(Sizes.begin(), Sizes.end(),
11366                      [](unsigned Size) {
11367                        return Size == 8 || Size == 16 || Size == 32 ||
11368                               Size == 64 || Size == 128;
11369                      }) &&
11370          "Invalid size");
11371 
11372   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11373                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11374                          OutputBecomesInput);
11375 }
11376 
11377 /// Mangle the parameter part of the vector function name according to
11378 /// their OpenMP classification. The mangling function is defined in
11379 /// section 3.5 of the AAVFABI.
11380 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11381   SmallString<256> Buffer;
11382   llvm::raw_svector_ostream Out(Buffer);
11383   for (const auto &ParamAttr : ParamAttrs) {
11384     switch (ParamAttr.Kind) {
11385     case LinearWithVarStride:
11386       Out << "ls" << ParamAttr.StrideOrArg;
11387       break;
11388     case Linear:
11389       Out << 'l';
11390       // Don't print the step value if it is not present or if it is
11391       // equal to 1.
11392       if (ParamAttr.StrideOrArg != 1)
11393         Out << ParamAttr.StrideOrArg;
11394       break;
11395     case Uniform:
11396       Out << 'u';
11397       break;
11398     case Vector:
11399       Out << 'v';
11400       break;
11401     }
11402 
11403     if (!!ParamAttr.Alignment)
11404       Out << 'a' << ParamAttr.Alignment;
11405   }
11406 
11407   return std::string(Out.str());
11408 }
11409 
11410 // Function used to add the attribute. The parameter `VLEN` is
11411 // templated to allow the use of "x" when targeting scalable functions
11412 // for SVE.
11413 template <typename T>
11414 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11415                                  char ISA, StringRef ParSeq,
11416                                  StringRef MangledName, bool OutputBecomesInput,
11417                                  llvm::Function *Fn) {
11418   SmallString<256> Buffer;
11419   llvm::raw_svector_ostream Out(Buffer);
11420   Out << Prefix << ISA << LMask << VLEN;
11421   if (OutputBecomesInput)
11422     Out << "v";
11423   Out << ParSeq << "_" << MangledName;
11424   Fn->addFnAttr(Out.str());
11425 }
11426 
11427 // Helper function to generate the Advanced SIMD names depending on
11428 // the value of the NDS when simdlen is not present.
11429 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11430                                       StringRef Prefix, char ISA,
11431                                       StringRef ParSeq, StringRef MangledName,
11432                                       bool OutputBecomesInput,
11433                                       llvm::Function *Fn) {
11434   switch (NDS) {
11435   case 8:
11436     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11437                          OutputBecomesInput, Fn);
11438     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11439                          OutputBecomesInput, Fn);
11440     break;
11441   case 16:
11442     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11443                          OutputBecomesInput, Fn);
11444     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11445                          OutputBecomesInput, Fn);
11446     break;
11447   case 32:
11448     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11449                          OutputBecomesInput, Fn);
11450     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11451                          OutputBecomesInput, Fn);
11452     break;
11453   case 64:
11454   case 128:
11455     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11456                          OutputBecomesInput, Fn);
11457     break;
11458   default:
11459     llvm_unreachable("Scalar type is too wide.");
11460   }
11461 }
11462 
11463 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11464 static void emitAArch64DeclareSimdFunction(
11465     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11466     ArrayRef<ParamAttrTy> ParamAttrs,
11467     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11468     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11469 
11470   // Get basic data for building the vector signature.
11471   const auto Data = getNDSWDS(FD, ParamAttrs);
11472   const unsigned NDS = std::get<0>(Data);
11473   const unsigned WDS = std::get<1>(Data);
11474   const bool OutputBecomesInput = std::get<2>(Data);
11475 
11476   // Check the values provided via `simdlen` by the user.
11477   // 1. A `simdlen(1)` doesn't produce vector signatures,
11478   if (UserVLEN == 1) {
11479     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11480         DiagnosticsEngine::Warning,
11481         "The clause simdlen(1) has no effect when targeting aarch64.");
11482     CGM.getDiags().Report(SLoc, DiagID);
11483     return;
11484   }
11485 
11486   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11487   // Advanced SIMD output.
11488   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11489     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11490         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11491                                     "power of 2 when targeting Advanced SIMD.");
11492     CGM.getDiags().Report(SLoc, DiagID);
11493     return;
11494   }
11495 
11496   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11497   // limits.
11498   if (ISA == 's' && UserVLEN != 0) {
11499     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11500       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11501           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11502                                       "lanes in the architectural constraints "
11503                                       "for SVE (min is 128-bit, max is "
11504                                       "2048-bit, by steps of 128-bit)");
11505       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11506       return;
11507     }
11508   }
11509 
11510   // Sort out parameter sequence.
11511   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11512   StringRef Prefix = "_ZGV";
11513   // Generate simdlen from user input (if any).
11514   if (UserVLEN) {
11515     if (ISA == 's') {
11516       // SVE generates only a masked function.
11517       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11518                            OutputBecomesInput, Fn);
11519     } else {
11520       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11521       // Advanced SIMD generates one or two functions, depending on
11522       // the `[not]inbranch` clause.
11523       switch (State) {
11524       case OMPDeclareSimdDeclAttr::BS_Undefined:
11525         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11526                              OutputBecomesInput, Fn);
11527         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11528                              OutputBecomesInput, Fn);
11529         break;
11530       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11531         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11532                              OutputBecomesInput, Fn);
11533         break;
11534       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11535         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11536                              OutputBecomesInput, Fn);
11537         break;
11538       }
11539     }
11540   } else {
11541     // If no user simdlen is provided, follow the AAVFABI rules for
11542     // generating the vector length.
11543     if (ISA == 's') {
11544       // SVE, section 3.4.1, item 1.
11545       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11546                            OutputBecomesInput, Fn);
11547     } else {
11548       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11549       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11550       // two vector names depending on the use of the clause
11551       // `[not]inbranch`.
11552       switch (State) {
11553       case OMPDeclareSimdDeclAttr::BS_Undefined:
11554         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11555                                   OutputBecomesInput, Fn);
11556         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11557                                   OutputBecomesInput, Fn);
11558         break;
11559       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11560         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11561                                   OutputBecomesInput, Fn);
11562         break;
11563       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11564         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11565                                   OutputBecomesInput, Fn);
11566         break;
11567       }
11568     }
11569   }
11570 }
11571 
11572 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11573                                               llvm::Function *Fn) {
11574   ASTContext &C = CGM.getContext();
11575   FD = FD->getMostRecentDecl();
11576   // Map params to their positions in function decl.
11577   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11578   if (isa<CXXMethodDecl>(FD))
11579     ParamPositions.try_emplace(FD, 0);
11580   unsigned ParamPos = ParamPositions.size();
11581   for (const ParmVarDecl *P : FD->parameters()) {
11582     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11583     ++ParamPos;
11584   }
11585   while (FD) {
11586     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11587       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11588       // Mark uniform parameters.
11589       for (const Expr *E : Attr->uniforms()) {
11590         E = E->IgnoreParenImpCasts();
11591         unsigned Pos;
11592         if (isa<CXXThisExpr>(E)) {
11593           Pos = ParamPositions[FD];
11594         } else {
11595           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11596                                 ->getCanonicalDecl();
11597           Pos = ParamPositions[PVD];
11598         }
11599         ParamAttrs[Pos].Kind = Uniform;
11600       }
11601       // Get alignment info.
11602       auto NI = Attr->alignments_begin();
11603       for (const Expr *E : Attr->aligneds()) {
11604         E = E->IgnoreParenImpCasts();
11605         unsigned Pos;
11606         QualType ParmTy;
11607         if (isa<CXXThisExpr>(E)) {
11608           Pos = ParamPositions[FD];
11609           ParmTy = E->getType();
11610         } else {
11611           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11612                                 ->getCanonicalDecl();
11613           Pos = ParamPositions[PVD];
11614           ParmTy = PVD->getType();
11615         }
11616         ParamAttrs[Pos].Alignment =
11617             (*NI)
11618                 ? (*NI)->EvaluateKnownConstInt(C)
11619                 : llvm::APSInt::getUnsigned(
11620                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11621                           .getQuantity());
11622         ++NI;
11623       }
11624       // Mark linear parameters.
11625       auto SI = Attr->steps_begin();
11626       auto MI = Attr->modifiers_begin();
11627       for (const Expr *E : Attr->linears()) {
11628         E = E->IgnoreParenImpCasts();
11629         unsigned Pos;
11630         // Rescaling factor needed to compute the linear parameter
11631         // value in the mangled name.
11632         unsigned PtrRescalingFactor = 1;
11633         if (isa<CXXThisExpr>(E)) {
11634           Pos = ParamPositions[FD];
11635         } else {
11636           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11637                                 ->getCanonicalDecl();
11638           Pos = ParamPositions[PVD];
11639           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11640             PtrRescalingFactor = CGM.getContext()
11641                                      .getTypeSizeInChars(P->getPointeeType())
11642                                      .getQuantity();
11643         }
11644         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11645         ParamAttr.Kind = Linear;
11646         // Assuming a stride of 1, for `linear` without modifiers.
11647         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11648         if (*SI) {
11649           Expr::EvalResult Result;
11650           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11651             if (const auto *DRE =
11652                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11653               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11654                 ParamAttr.Kind = LinearWithVarStride;
11655                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11656                     ParamPositions[StridePVD->getCanonicalDecl()]);
11657               }
11658             }
11659           } else {
11660             ParamAttr.StrideOrArg = Result.Val.getInt();
11661           }
11662         }
11663         // If we are using a linear clause on a pointer, we need to
11664         // rescale the value of linear_step with the byte size of the
11665         // pointee type.
11666         if (Linear == ParamAttr.Kind)
11667           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11668         ++SI;
11669         ++MI;
11670       }
11671       llvm::APSInt VLENVal;
11672       SourceLocation ExprLoc;
11673       const Expr *VLENExpr = Attr->getSimdlen();
11674       if (VLENExpr) {
11675         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11676         ExprLoc = VLENExpr->getExprLoc();
11677       }
11678       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11679       if (CGM.getTriple().isX86()) {
11680         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11681       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11682         unsigned VLEN = VLENVal.getExtValue();
11683         StringRef MangledName = Fn->getName();
11684         if (CGM.getTarget().hasFeature("sve"))
11685           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11686                                          MangledName, 's', 128, Fn, ExprLoc);
11687         if (CGM.getTarget().hasFeature("neon"))
11688           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11689                                          MangledName, 'n', 128, Fn, ExprLoc);
11690       }
11691     }
11692     FD = FD->getPreviousDecl();
11693   }
11694 }
11695 
11696 namespace {
11697 /// Cleanup action for doacross support.
11698 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11699 public:
11700   static const int DoacrossFinArgs = 2;
11701 
11702 private:
11703   llvm::FunctionCallee RTLFn;
11704   llvm::Value *Args[DoacrossFinArgs];
11705 
11706 public:
11707   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11708                     ArrayRef<llvm::Value *> CallArgs)
11709       : RTLFn(RTLFn) {
11710     assert(CallArgs.size() == DoacrossFinArgs);
11711     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11712   }
11713   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11714     if (!CGF.HaveInsertPoint())
11715       return;
11716     CGF.EmitRuntimeCall(RTLFn, Args);
11717   }
11718 };
11719 } // namespace
11720 
11721 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11722                                        const OMPLoopDirective &D,
11723                                        ArrayRef<Expr *> NumIterations) {
11724   if (!CGF.HaveInsertPoint())
11725     return;
11726 
11727   ASTContext &C = CGM.getContext();
11728   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11729   RecordDecl *RD;
11730   if (KmpDimTy.isNull()) {
11731     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11732     //  kmp_int64 lo; // lower
11733     //  kmp_int64 up; // upper
11734     //  kmp_int64 st; // stride
11735     // };
11736     RD = C.buildImplicitRecord("kmp_dim");
11737     RD->startDefinition();
11738     addFieldToRecordDecl(C, RD, Int64Ty);
11739     addFieldToRecordDecl(C, RD, Int64Ty);
11740     addFieldToRecordDecl(C, RD, Int64Ty);
11741     RD->completeDefinition();
11742     KmpDimTy = C.getRecordType(RD);
11743   } else {
11744     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11745   }
11746   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11747   QualType ArrayTy =
11748       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11749 
11750   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11751   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11752   enum { LowerFD = 0, UpperFD, StrideFD };
11753   // Fill dims with data.
11754   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11755     LValue DimsLVal = CGF.MakeAddrLValue(
11756         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11757     // dims.upper = num_iterations;
11758     LValue UpperLVal = CGF.EmitLValueForField(
11759         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11760     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11761         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11762         Int64Ty, NumIterations[I]->getExprLoc());
11763     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11764     // dims.stride = 1;
11765     LValue StrideLVal = CGF.EmitLValueForField(
11766         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11767     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11768                           StrideLVal);
11769   }
11770 
11771   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11772   // kmp_int32 num_dims, struct kmp_dim * dims);
11773   llvm::Value *Args[] = {
11774       emitUpdateLocation(CGF, D.getBeginLoc()),
11775       getThreadID(CGF, D.getBeginLoc()),
11776       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11777       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11778           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11779           CGM.VoidPtrTy)};
11780 
11781   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11782       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11783   CGF.EmitRuntimeCall(RTLFn, Args);
11784   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11785       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11786   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11787       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11788   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11789                                              llvm::makeArrayRef(FiniArgs));
11790 }
11791 
11792 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11793                                           const OMPDependClause *C) {
11794   QualType Int64Ty =
11795       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11796   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11797   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11798       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11799   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11800   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11801     const Expr *CounterVal = C->getLoopData(I);
11802     assert(CounterVal);
11803     llvm::Value *CntVal = CGF.EmitScalarConversion(
11804         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11805         CounterVal->getExprLoc());
11806     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11807                           /*Volatile=*/false, Int64Ty);
11808   }
11809   llvm::Value *Args[] = {
11810       emitUpdateLocation(CGF, C->getBeginLoc()),
11811       getThreadID(CGF, C->getBeginLoc()),
11812       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11813   llvm::FunctionCallee RTLFn;
11814   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11815     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11816                                                   OMPRTL___kmpc_doacross_post);
11817   } else {
11818     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11819     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11820                                                   OMPRTL___kmpc_doacross_wait);
11821   }
11822   CGF.EmitRuntimeCall(RTLFn, Args);
11823 }
11824 
11825 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11826                                llvm::FunctionCallee Callee,
11827                                ArrayRef<llvm::Value *> Args) const {
11828   assert(Loc.isValid() && "Outlined function call location must be valid.");
11829   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11830 
11831   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11832     if (Fn->doesNotThrow()) {
11833       CGF.EmitNounwindRuntimeCall(Fn, Args);
11834       return;
11835     }
11836   }
11837   CGF.EmitRuntimeCall(Callee, Args);
11838 }
11839 
11840 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11841     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11842     ArrayRef<llvm::Value *> Args) const {
11843   emitCall(CGF, Loc, OutlinedFn, Args);
11844 }
11845 
11846 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11847   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11848     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11849       HasEmittedDeclareTargetRegion = true;
11850 }
11851 
11852 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11853                                              const VarDecl *NativeParam,
11854                                              const VarDecl *TargetParam) const {
11855   return CGF.GetAddrOfLocalVar(NativeParam);
11856 }
11857 
11858 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11859                                                    const VarDecl *VD) {
11860   if (!VD)
11861     return Address::invalid();
11862   Address UntiedAddr = Address::invalid();
11863   Address UntiedRealAddr = Address::invalid();
11864   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11865   if (It != FunctionToUntiedTaskStackMap.end()) {
11866     const UntiedLocalVarsAddressesMap &UntiedData =
11867         UntiedLocalVarsStack[It->second];
11868     auto I = UntiedData.find(VD);
11869     if (I != UntiedData.end()) {
11870       UntiedAddr = I->second.first;
11871       UntiedRealAddr = I->second.second;
11872     }
11873   }
11874   const VarDecl *CVD = VD->getCanonicalDecl();
11875   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11876     // Use the default allocation.
11877     if (!isAllocatableDecl(VD))
11878       return UntiedAddr;
11879     llvm::Value *Size;
11880     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11881     if (CVD->getType()->isVariablyModifiedType()) {
11882       Size = CGF.getTypeSize(CVD->getType());
11883       // Align the size: ((size + align - 1) / align) * align
11884       Size = CGF.Builder.CreateNUWAdd(
11885           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11886       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11887       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11888     } else {
11889       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11890       Size = CGM.getSize(Sz.alignTo(Align));
11891     }
11892     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11893     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11894     assert(AA->getAllocator() &&
11895            "Expected allocator expression for non-default allocator.");
11896     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11897     // According to the standard, the original allocator type is a enum
11898     // (integer). Convert to pointer type, if required.
11899     Allocator = CGF.EmitScalarConversion(
11900         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11901         AA->getAllocator()->getExprLoc());
11902     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11903 
11904     llvm::Value *Addr =
11905         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11906                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11907                             Args, getName({CVD->getName(), ".void.addr"}));
11908     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11909         CGM.getModule(), OMPRTL___kmpc_free);
11910     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11911     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11912         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11913     if (UntiedAddr.isValid())
11914       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11915 
11916     // Cleanup action for allocate support.
11917     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11918       llvm::FunctionCallee RTLFn;
11919       unsigned LocEncoding;
11920       Address Addr;
11921       const Expr *Allocator;
11922 
11923     public:
11924       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11925                            Address Addr, const Expr *Allocator)
11926           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11927             Allocator(Allocator) {}
11928       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11929         if (!CGF.HaveInsertPoint())
11930           return;
11931         llvm::Value *Args[3];
11932         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11933             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11934         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11935             Addr.getPointer(), CGF.VoidPtrTy);
11936         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11937         // According to the standard, the original allocator type is a enum
11938         // (integer). Convert to pointer type, if required.
11939         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11940                                             CGF.getContext().VoidPtrTy,
11941                                             Allocator->getExprLoc());
11942         Args[2] = AllocVal;
11943 
11944         CGF.EmitRuntimeCall(RTLFn, Args);
11945       }
11946     };
11947     Address VDAddr =
11948         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11949     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11950         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11951         VDAddr, AA->getAllocator());
11952     if (UntiedRealAddr.isValid())
11953       if (auto *Region =
11954               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11955         Region->emitUntiedSwitch(CGF);
11956     return VDAddr;
11957   }
11958   return UntiedAddr;
11959 }
11960 
11961 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11962                                              const VarDecl *VD) const {
11963   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11964   if (It == FunctionToUntiedTaskStackMap.end())
11965     return false;
11966   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11967 }
11968 
11969 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11970     CodeGenModule &CGM, const OMPLoopDirective &S)
11971     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11972   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11973   if (!NeedToPush)
11974     return;
11975   NontemporalDeclsSet &DS =
11976       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11977   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11978     for (const Stmt *Ref : C->private_refs()) {
11979       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11980       const ValueDecl *VD;
11981       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11982         VD = DRE->getDecl();
11983       } else {
11984         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11985         assert((ME->isImplicitCXXThis() ||
11986                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11987                "Expected member of current class.");
11988         VD = ME->getMemberDecl();
11989       }
11990       DS.insert(VD);
11991     }
11992   }
11993 }
11994 
11995 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11996   if (!NeedToPush)
11997     return;
11998   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11999 }
12000 
12001 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12002     CodeGenFunction &CGF,
12003     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
12004                          std::pair<Address, Address>> &LocalVars)
12005     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12006   if (!NeedToPush)
12007     return;
12008   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12009       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12010   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12011 }
12012 
12013 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12014   if (!NeedToPush)
12015     return;
12016   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12017 }
12018 
12019 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12020   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12021 
12022   return llvm::any_of(
12023       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12024       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12025 }
12026 
12027 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12028     const OMPExecutableDirective &S,
12029     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12030     const {
12031   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12032   // Vars in target/task regions must be excluded completely.
12033   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12034       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12035     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12036     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12037     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12038     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12039       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12040         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12041     }
12042   }
12043   // Exclude vars in private clauses.
12044   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12045     for (const Expr *Ref : C->varlists()) {
12046       if (!Ref->getType()->isScalarType())
12047         continue;
12048       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12049       if (!DRE)
12050         continue;
12051       NeedToCheckForLPCs.insert(DRE->getDecl());
12052     }
12053   }
12054   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12055     for (const Expr *Ref : C->varlists()) {
12056       if (!Ref->getType()->isScalarType())
12057         continue;
12058       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12059       if (!DRE)
12060         continue;
12061       NeedToCheckForLPCs.insert(DRE->getDecl());
12062     }
12063   }
12064   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12065     for (const Expr *Ref : C->varlists()) {
12066       if (!Ref->getType()->isScalarType())
12067         continue;
12068       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12069       if (!DRE)
12070         continue;
12071       NeedToCheckForLPCs.insert(DRE->getDecl());
12072     }
12073   }
12074   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12075     for (const Expr *Ref : C->varlists()) {
12076       if (!Ref->getType()->isScalarType())
12077         continue;
12078       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12079       if (!DRE)
12080         continue;
12081       NeedToCheckForLPCs.insert(DRE->getDecl());
12082     }
12083   }
12084   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12085     for (const Expr *Ref : C->varlists()) {
12086       if (!Ref->getType()->isScalarType())
12087         continue;
12088       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12089       if (!DRE)
12090         continue;
12091       NeedToCheckForLPCs.insert(DRE->getDecl());
12092     }
12093   }
12094   for (const Decl *VD : NeedToCheckForLPCs) {
12095     for (const LastprivateConditionalData &Data :
12096          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12097       if (Data.DeclToUniqueName.count(VD) > 0) {
12098         if (!Data.Disabled)
12099           NeedToAddForLPCsAsDisabled.insert(VD);
12100         break;
12101       }
12102     }
12103   }
12104 }
12105 
12106 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12107     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12108     : CGM(CGF.CGM),
12109       Action((CGM.getLangOpts().OpenMP >= 50 &&
12110               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12111                            [](const OMPLastprivateClause *C) {
12112                              return C->getKind() ==
12113                                     OMPC_LASTPRIVATE_conditional;
12114                            }))
12115                  ? ActionToDo::PushAsLastprivateConditional
12116                  : ActionToDo::DoNotPush) {
12117   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12118   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12119     return;
12120   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12121          "Expected a push action.");
12122   LastprivateConditionalData &Data =
12123       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12124   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12125     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12126       continue;
12127 
12128     for (const Expr *Ref : C->varlists()) {
12129       Data.DeclToUniqueName.insert(std::make_pair(
12130           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12131           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12132     }
12133   }
12134   Data.IVLVal = IVLVal;
12135   Data.Fn = CGF.CurFn;
12136 }
12137 
12138 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12139     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12140     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12141   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12142   if (CGM.getLangOpts().OpenMP < 50)
12143     return;
12144   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12145   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12146   if (!NeedToAddForLPCsAsDisabled.empty()) {
12147     Action = ActionToDo::DisableLastprivateConditional;
12148     LastprivateConditionalData &Data =
12149         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12150     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12151       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12152     Data.Fn = CGF.CurFn;
12153     Data.Disabled = true;
12154   }
12155 }
12156 
12157 CGOpenMPRuntime::LastprivateConditionalRAII
12158 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12159     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12160   return LastprivateConditionalRAII(CGF, S);
12161 }
12162 
12163 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12164   if (CGM.getLangOpts().OpenMP < 50)
12165     return;
12166   if (Action == ActionToDo::DisableLastprivateConditional) {
12167     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12168            "Expected list of disabled private vars.");
12169     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12170   }
12171   if (Action == ActionToDo::PushAsLastprivateConditional) {
12172     assert(
12173         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12174         "Expected list of lastprivate conditional vars.");
12175     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12176   }
12177 }
12178 
12179 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12180                                                         const VarDecl *VD) {
12181   ASTContext &C = CGM.getContext();
12182   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12183   if (I == LastprivateConditionalToTypes.end())
12184     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12185   QualType NewType;
12186   const FieldDecl *VDField;
12187   const FieldDecl *FiredField;
12188   LValue BaseLVal;
12189   auto VI = I->getSecond().find(VD);
12190   if (VI == I->getSecond().end()) {
12191     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12192     RD->startDefinition();
12193     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12194     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12195     RD->completeDefinition();
12196     NewType = C.getRecordType(RD);
12197     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12198     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12199     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12200   } else {
12201     NewType = std::get<0>(VI->getSecond());
12202     VDField = std::get<1>(VI->getSecond());
12203     FiredField = std::get<2>(VI->getSecond());
12204     BaseLVal = std::get<3>(VI->getSecond());
12205   }
12206   LValue FiredLVal =
12207       CGF.EmitLValueForField(BaseLVal, FiredField);
12208   CGF.EmitStoreOfScalar(
12209       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12210       FiredLVal);
12211   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12212 }
12213 
12214 namespace {
12215 /// Checks if the lastprivate conditional variable is referenced in LHS.
12216 class LastprivateConditionalRefChecker final
12217     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12218   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12219   const Expr *FoundE = nullptr;
12220   const Decl *FoundD = nullptr;
12221   StringRef UniqueDeclName;
12222   LValue IVLVal;
12223   llvm::Function *FoundFn = nullptr;
12224   SourceLocation Loc;
12225 
12226 public:
12227   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12228     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12229          llvm::reverse(LPM)) {
12230       auto It = D.DeclToUniqueName.find(E->getDecl());
12231       if (It == D.DeclToUniqueName.end())
12232         continue;
12233       if (D.Disabled)
12234         return false;
12235       FoundE = E;
12236       FoundD = E->getDecl()->getCanonicalDecl();
12237       UniqueDeclName = It->second;
12238       IVLVal = D.IVLVal;
12239       FoundFn = D.Fn;
12240       break;
12241     }
12242     return FoundE == E;
12243   }
12244   bool VisitMemberExpr(const MemberExpr *E) {
12245     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12246       return false;
12247     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12248          llvm::reverse(LPM)) {
12249       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12250       if (It == D.DeclToUniqueName.end())
12251         continue;
12252       if (D.Disabled)
12253         return false;
12254       FoundE = E;
12255       FoundD = E->getMemberDecl()->getCanonicalDecl();
12256       UniqueDeclName = It->second;
12257       IVLVal = D.IVLVal;
12258       FoundFn = D.Fn;
12259       break;
12260     }
12261     return FoundE == E;
12262   }
12263   bool VisitStmt(const Stmt *S) {
12264     for (const Stmt *Child : S->children()) {
12265       if (!Child)
12266         continue;
12267       if (const auto *E = dyn_cast<Expr>(Child))
12268         if (!E->isGLValue())
12269           continue;
12270       if (Visit(Child))
12271         return true;
12272     }
12273     return false;
12274   }
12275   explicit LastprivateConditionalRefChecker(
12276       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12277       : LPM(LPM) {}
12278   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12279   getFoundData() const {
12280     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12281   }
12282 };
12283 } // namespace
12284 
12285 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12286                                                        LValue IVLVal,
12287                                                        StringRef UniqueDeclName,
12288                                                        LValue LVal,
12289                                                        SourceLocation Loc) {
12290   // Last updated loop counter for the lastprivate conditional var.
12291   // int<xx> last_iv = 0;
12292   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12293   llvm::Constant *LastIV =
12294       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12295   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12296       IVLVal.getAlignment().getAsAlign());
12297   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12298 
12299   // Last value of the lastprivate conditional.
12300   // decltype(priv_a) last_a;
12301   llvm::Constant *Last = getOrCreateInternalVariable(
12302       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12303   cast<llvm::GlobalVariable>(Last)->setAlignment(
12304       LVal.getAlignment().getAsAlign());
12305   LValue LastLVal =
12306       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12307 
12308   // Global loop counter. Required to handle inner parallel-for regions.
12309   // iv
12310   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12311 
12312   // #pragma omp critical(a)
12313   // if (last_iv <= iv) {
12314   //   last_iv = iv;
12315   //   last_a = priv_a;
12316   // }
12317   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12318                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12319     Action.Enter(CGF);
12320     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12321     // (last_iv <= iv) ? Check if the variable is updated and store new
12322     // value in global var.
12323     llvm::Value *CmpRes;
12324     if (IVLVal.getType()->isSignedIntegerType()) {
12325       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12326     } else {
12327       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12328              "Loop iteration variable must be integer.");
12329       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12330     }
12331     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12332     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12333     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12334     // {
12335     CGF.EmitBlock(ThenBB);
12336 
12337     //   last_iv = iv;
12338     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12339 
12340     //   last_a = priv_a;
12341     switch (CGF.getEvaluationKind(LVal.getType())) {
12342     case TEK_Scalar: {
12343       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12344       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12345       break;
12346     }
12347     case TEK_Complex: {
12348       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12349       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12350       break;
12351     }
12352     case TEK_Aggregate:
12353       llvm_unreachable(
12354           "Aggregates are not supported in lastprivate conditional.");
12355     }
12356     // }
12357     CGF.EmitBranch(ExitBB);
12358     // There is no need to emit line number for unconditional branch.
12359     (void)ApplyDebugLocation::CreateEmpty(CGF);
12360     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12361   };
12362 
12363   if (CGM.getLangOpts().OpenMPSimd) {
12364     // Do not emit as a critical region as no parallel region could be emitted.
12365     RegionCodeGenTy ThenRCG(CodeGen);
12366     ThenRCG(CGF);
12367   } else {
12368     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12369   }
12370 }
12371 
12372 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12373                                                          const Expr *LHS) {
12374   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12375     return;
12376   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12377   if (!Checker.Visit(LHS))
12378     return;
12379   const Expr *FoundE;
12380   const Decl *FoundD;
12381   StringRef UniqueDeclName;
12382   LValue IVLVal;
12383   llvm::Function *FoundFn;
12384   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12385       Checker.getFoundData();
12386   if (FoundFn != CGF.CurFn) {
12387     // Special codegen for inner parallel regions.
12388     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12389     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12390     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12391            "Lastprivate conditional is not found in outer region.");
12392     QualType StructTy = std::get<0>(It->getSecond());
12393     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12394     LValue PrivLVal = CGF.EmitLValue(FoundE);
12395     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12396         PrivLVal.getAddress(CGF),
12397         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12398     LValue BaseLVal =
12399         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12400     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12401     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12402                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12403                         FiredLVal, llvm::AtomicOrdering::Unordered,
12404                         /*IsVolatile=*/true, /*isInit=*/false);
12405     return;
12406   }
12407 
12408   // Private address of the lastprivate conditional in the current context.
12409   // priv_a
12410   LValue LVal = CGF.EmitLValue(FoundE);
12411   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12412                                    FoundE->getExprLoc());
12413 }
12414 
12415 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12416     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12417     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12418   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12419     return;
12420   auto Range = llvm::reverse(LastprivateConditionalStack);
12421   auto It = llvm::find_if(
12422       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12423   if (It == Range.end() || It->Fn != CGF.CurFn)
12424     return;
12425   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12426   assert(LPCI != LastprivateConditionalToTypes.end() &&
12427          "Lastprivates must be registered already.");
12428   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12429   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12430   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12431   for (const auto &Pair : It->DeclToUniqueName) {
12432     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12433     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12434       continue;
12435     auto I = LPCI->getSecond().find(Pair.first);
12436     assert(I != LPCI->getSecond().end() &&
12437            "Lastprivate must be rehistered already.");
12438     // bool Cmp = priv_a.Fired != 0;
12439     LValue BaseLVal = std::get<3>(I->getSecond());
12440     LValue FiredLVal =
12441         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12442     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12443     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12444     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12445     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12446     // if (Cmp) {
12447     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12448     CGF.EmitBlock(ThenBB);
12449     Address Addr = CGF.GetAddrOfLocalVar(VD);
12450     LValue LVal;
12451     if (VD->getType()->isReferenceType())
12452       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12453                                            AlignmentSource::Decl);
12454     else
12455       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12456                                 AlignmentSource::Decl);
12457     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12458                                      D.getBeginLoc());
12459     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12460     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12461     // }
12462   }
12463 }
12464 
12465 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12466     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12467     SourceLocation Loc) {
12468   if (CGF.getLangOpts().OpenMP < 50)
12469     return;
12470   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12471   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12472          "Unknown lastprivate conditional variable.");
12473   StringRef UniqueName = It->second;
12474   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12475   // The variable was not updated in the region - exit.
12476   if (!GV)
12477     return;
12478   LValue LPLVal = CGF.MakeAddrLValue(
12479       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12480   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12481   CGF.EmitStoreOfScalar(Res, PrivLVal);
12482 }
12483 
12484 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12485     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12486     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12487   llvm_unreachable("Not supported in SIMD-only mode");
12488 }
12489 
12490 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12491     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12492     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12493   llvm_unreachable("Not supported in SIMD-only mode");
12494 }
12495 
12496 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12497     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12498     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12499     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12500     bool Tied, unsigned &NumberOfParts) {
12501   llvm_unreachable("Not supported in SIMD-only mode");
12502 }
12503 
12504 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12505                                            SourceLocation Loc,
12506                                            llvm::Function *OutlinedFn,
12507                                            ArrayRef<llvm::Value *> CapturedVars,
12508                                            const Expr *IfCond) {
12509   llvm_unreachable("Not supported in SIMD-only mode");
12510 }
12511 
12512 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12513     CodeGenFunction &CGF, StringRef CriticalName,
12514     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12515     const Expr *Hint) {
12516   llvm_unreachable("Not supported in SIMD-only mode");
12517 }
12518 
12519 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12520                                            const RegionCodeGenTy &MasterOpGen,
12521                                            SourceLocation Loc) {
12522   llvm_unreachable("Not supported in SIMD-only mode");
12523 }
12524 
12525 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12526                                             SourceLocation Loc) {
12527   llvm_unreachable("Not supported in SIMD-only mode");
12528 }
12529 
12530 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12531     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12532     SourceLocation Loc) {
12533   llvm_unreachable("Not supported in SIMD-only mode");
12534 }
12535 
12536 void CGOpenMPSIMDRuntime::emitSingleRegion(
12537     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12538     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12539     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12540     ArrayRef<const Expr *> AssignmentOps) {
12541   llvm_unreachable("Not supported in SIMD-only mode");
12542 }
12543 
12544 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12545                                             const RegionCodeGenTy &OrderedOpGen,
12546                                             SourceLocation Loc,
12547                                             bool IsThreads) {
12548   llvm_unreachable("Not supported in SIMD-only mode");
12549 }
12550 
12551 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12552                                           SourceLocation Loc,
12553                                           OpenMPDirectiveKind Kind,
12554                                           bool EmitChecks,
12555                                           bool ForceSimpleCall) {
12556   llvm_unreachable("Not supported in SIMD-only mode");
12557 }
12558 
12559 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12560     CodeGenFunction &CGF, SourceLocation Loc,
12561     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12562     bool Ordered, const DispatchRTInput &DispatchValues) {
12563   llvm_unreachable("Not supported in SIMD-only mode");
12564 }
12565 
12566 void CGOpenMPSIMDRuntime::emitForStaticInit(
12567     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12568     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12569   llvm_unreachable("Not supported in SIMD-only mode");
12570 }
12571 
12572 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12573     CodeGenFunction &CGF, SourceLocation Loc,
12574     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12575   llvm_unreachable("Not supported in SIMD-only mode");
12576 }
12577 
12578 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12579                                                      SourceLocation Loc,
12580                                                      unsigned IVSize,
12581                                                      bool IVSigned) {
12582   llvm_unreachable("Not supported in SIMD-only mode");
12583 }
12584 
12585 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12586                                               SourceLocation Loc,
12587                                               OpenMPDirectiveKind DKind) {
12588   llvm_unreachable("Not supported in SIMD-only mode");
12589 }
12590 
12591 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12592                                               SourceLocation Loc,
12593                                               unsigned IVSize, bool IVSigned,
12594                                               Address IL, Address LB,
12595                                               Address UB, Address ST) {
12596   llvm_unreachable("Not supported in SIMD-only mode");
12597 }
12598 
12599 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12600                                                llvm::Value *NumThreads,
12601                                                SourceLocation Loc) {
12602   llvm_unreachable("Not supported in SIMD-only mode");
12603 }
12604 
12605 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12606                                              ProcBindKind ProcBind,
12607                                              SourceLocation Loc) {
12608   llvm_unreachable("Not supported in SIMD-only mode");
12609 }
12610 
12611 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12612                                                     const VarDecl *VD,
12613                                                     Address VDAddr,
12614                                                     SourceLocation Loc) {
12615   llvm_unreachable("Not supported in SIMD-only mode");
12616 }
12617 
12618 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12619     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12620     CodeGenFunction *CGF) {
12621   llvm_unreachable("Not supported in SIMD-only mode");
12622 }
12623 
12624 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12625     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12626   llvm_unreachable("Not supported in SIMD-only mode");
12627 }
12628 
12629 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12630                                     ArrayRef<const Expr *> Vars,
12631                                     SourceLocation Loc,
12632                                     llvm::AtomicOrdering AO) {
12633   llvm_unreachable("Not supported in SIMD-only mode");
12634 }
12635 
12636 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12637                                        const OMPExecutableDirective &D,
12638                                        llvm::Function *TaskFunction,
12639                                        QualType SharedsTy, Address Shareds,
12640                                        const Expr *IfCond,
12641                                        const OMPTaskDataTy &Data) {
12642   llvm_unreachable("Not supported in SIMD-only mode");
12643 }
12644 
12645 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12646     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12647     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12648     const Expr *IfCond, const OMPTaskDataTy &Data) {
12649   llvm_unreachable("Not supported in SIMD-only mode");
12650 }
12651 
12652 void CGOpenMPSIMDRuntime::emitReduction(
12653     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12654     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12655     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12656   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12657   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12658                                  ReductionOps, Options);
12659 }
12660 
12661 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12662     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12663     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12664   llvm_unreachable("Not supported in SIMD-only mode");
12665 }
12666 
12667 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12668                                                 SourceLocation Loc,
12669                                                 bool IsWorksharingReduction) {
12670   llvm_unreachable("Not supported in SIMD-only mode");
12671 }
12672 
12673 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12674                                                   SourceLocation Loc,
12675                                                   ReductionCodeGen &RCG,
12676                                                   unsigned N) {
12677   llvm_unreachable("Not supported in SIMD-only mode");
12678 }
12679 
12680 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12681                                                   SourceLocation Loc,
12682                                                   llvm::Value *ReductionsPtr,
12683                                                   LValue SharedLVal) {
12684   llvm_unreachable("Not supported in SIMD-only mode");
12685 }
12686 
12687 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12688                                            SourceLocation Loc) {
12689   llvm_unreachable("Not supported in SIMD-only mode");
12690 }
12691 
12692 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12693     CodeGenFunction &CGF, SourceLocation Loc,
12694     OpenMPDirectiveKind CancelRegion) {
12695   llvm_unreachable("Not supported in SIMD-only mode");
12696 }
12697 
12698 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12699                                          SourceLocation Loc, const Expr *IfCond,
12700                                          OpenMPDirectiveKind CancelRegion) {
12701   llvm_unreachable("Not supported in SIMD-only mode");
12702 }
12703 
12704 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12705     const OMPExecutableDirective &D, StringRef ParentName,
12706     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12707     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12708   llvm_unreachable("Not supported in SIMD-only mode");
12709 }
12710 
12711 void CGOpenMPSIMDRuntime::emitTargetCall(
12712     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12713     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12714     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12715     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12716                                      const OMPLoopDirective &D)>
12717         SizeEmitter) {
12718   llvm_unreachable("Not supported in SIMD-only mode");
12719 }
12720 
12721 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12722   llvm_unreachable("Not supported in SIMD-only mode");
12723 }
12724 
12725 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12726   llvm_unreachable("Not supported in SIMD-only mode");
12727 }
12728 
12729 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12730   return false;
12731 }
12732 
12733 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12734                                         const OMPExecutableDirective &D,
12735                                         SourceLocation Loc,
12736                                         llvm::Function *OutlinedFn,
12737                                         ArrayRef<llvm::Value *> CapturedVars) {
12738   llvm_unreachable("Not supported in SIMD-only mode");
12739 }
12740 
12741 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12742                                              const Expr *NumTeams,
12743                                              const Expr *ThreadLimit,
12744                                              SourceLocation Loc) {
12745   llvm_unreachable("Not supported in SIMD-only mode");
12746 }
12747 
12748 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12749     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12750     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12751   llvm_unreachable("Not supported in SIMD-only mode");
12752 }
12753 
12754 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12755     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12756     const Expr *Device) {
12757   llvm_unreachable("Not supported in SIMD-only mode");
12758 }
12759 
12760 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12761                                            const OMPLoopDirective &D,
12762                                            ArrayRef<Expr *> NumIterations) {
12763   llvm_unreachable("Not supported in SIMD-only mode");
12764 }
12765 
12766 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12767                                               const OMPDependClause *C) {
12768   llvm_unreachable("Not supported in SIMD-only mode");
12769 }
12770 
12771 const VarDecl *
12772 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12773                                         const VarDecl *NativeParam) const {
12774   llvm_unreachable("Not supported in SIMD-only mode");
12775 }
12776 
12777 Address
12778 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12779                                          const VarDecl *NativeParam,
12780                                          const VarDecl *TargetParam) const {
12781   llvm_unreachable("Not supported in SIMD-only mode");
12782 }
12783