1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102 
2103     // __kmpc_end_serialized_parallel(&Loc, GTid);
2104     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106                             M, OMPRTL___kmpc_end_serialized_parallel),
2107                         EndArgs);
2108   };
2109   if (IfCond) {
2110     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111   } else {
2112     RegionCodeGenTy ThenRCG(ThenGen);
2113     ThenRCG(CGF);
2114   }
2115 }
2116 
2117 // If we're inside an (outlined) parallel region, use the region info's
2118 // thread-ID variable (it is passed in a first argument of the outlined function
2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120 // regular serial code region, get thread ID by calling kmp_int32
2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122 // return the address of that temp.
2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124                                              SourceLocation Loc) {
2125   if (auto *OMPRegionInfo =
2126           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127     if (OMPRegionInfo->getThreadIDVariable())
2128       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129 
2130   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131   QualType Int32Ty =
2132       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134   CGF.EmitStoreOfScalar(ThreadID,
2135                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136 
2137   return ThreadIDTemp;
2138 }
2139 
2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142   SmallString<256> Buffer;
2143   llvm::raw_svector_ostream Out(Buffer);
2144   Out << Name;
2145   StringRef RuntimeName = Out.str();
2146   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147   if (Elem.second) {
2148     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149            "OMP internal variable has different type than requested");
2150     return &*Elem.second;
2151   }
2152 
2153   return Elem.second = new llvm::GlobalVariable(
2154              CGM.getModule(), Ty, /*IsConstant*/ false,
2155              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156              Elem.first(), /*InsertBefore=*/nullptr,
2157              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158 }
2159 
2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162   std::string Name = getName({Prefix, "var"});
2163   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164 }
2165 
2166 namespace {
2167 /// Common pre(post)-action for different OpenMP constructs.
2168 class CommonActionTy final : public PrePostActionTy {
2169   llvm::FunctionCallee EnterCallee;
2170   ArrayRef<llvm::Value *> EnterArgs;
2171   llvm::FunctionCallee ExitCallee;
2172   ArrayRef<llvm::Value *> ExitArgs;
2173   bool Conditional;
2174   llvm::BasicBlock *ContBlock = nullptr;
2175 
2176 public:
2177   CommonActionTy(llvm::FunctionCallee EnterCallee,
2178                  ArrayRef<llvm::Value *> EnterArgs,
2179                  llvm::FunctionCallee ExitCallee,
2180                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182         ExitArgs(ExitArgs), Conditional(Conditional) {}
2183   void Enter(CodeGenFunction &CGF) override {
2184     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185     if (Conditional) {
2186       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188       ContBlock = CGF.createBasicBlock("omp_if.end");
2189       // Generate the branch (If-stmt)
2190       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191       CGF.EmitBlock(ThenBlock);
2192     }
2193   }
2194   void Done(CodeGenFunction &CGF) {
2195     // Emit the rest of blocks/branches
2196     CGF.EmitBranch(ContBlock);
2197     CGF.EmitBlock(ContBlock, true);
2198   }
2199   void Exit(CodeGenFunction &CGF) override {
2200     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201   }
2202 };
2203 } // anonymous namespace
2204 
2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206                                          StringRef CriticalName,
2207                                          const RegionCodeGenTy &CriticalOpGen,
2208                                          SourceLocation Loc, const Expr *Hint) {
2209   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210   // CriticalOpGen();
2211   // __kmpc_end_critical(ident_t *, gtid, Lock);
2212   // Prepare arguments and build a call to __kmpc_critical
2213   if (!CGF.HaveInsertPoint())
2214     return;
2215   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216                          getCriticalRegionLock(CriticalName)};
2217   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218                                                 std::end(Args));
2219   if (Hint) {
2220     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222   }
2223   CommonActionTy Action(
2224       OMPBuilder.getOrCreateRuntimeFunction(
2225           CGM.getModule(),
2226           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2227       EnterArgs,
2228       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229                                             OMPRTL___kmpc_end_critical),
2230       Args);
2231   CriticalOpGen.setAction(Action);
2232   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233 }
2234 
2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236                                        const RegionCodeGenTy &MasterOpGen,
2237                                        SourceLocation Loc) {
2238   if (!CGF.HaveInsertPoint())
2239     return;
2240   // if(__kmpc_master(ident_t *, gtid)) {
2241   //   MasterOpGen();
2242   //   __kmpc_end_master(ident_t *, gtid);
2243   // }
2244   // Prepare arguments and build a call to __kmpc_master
2245   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247                             CGM.getModule(), OMPRTL___kmpc_master),
2248                         Args,
2249                         OMPBuilder.getOrCreateRuntimeFunction(
2250                             CGM.getModule(), OMPRTL___kmpc_end_master),
2251                         Args,
2252                         /*Conditional=*/true);
2253   MasterOpGen.setAction(Action);
2254   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255   Action.Done(CGF);
2256 }
2257 
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259                                         SourceLocation Loc) {
2260   if (!CGF.HaveInsertPoint())
2261     return;
2262   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263     OMPBuilder.CreateTaskyield(CGF.Builder);
2264   } else {
2265     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266     llvm::Value *Args[] = {
2267         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271                         Args);
2272   }
2273 
2274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275     Region->emitUntiedSwitch(CGF);
2276 }
2277 
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279                                           const RegionCodeGenTy &TaskgroupOpGen,
2280                                           SourceLocation Loc) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_taskgroup(ident_t *, gtid);
2284   // TaskgroupOpGen();
2285   // __kmpc_end_taskgroup(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_taskgroup
2287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290                         Args,
2291                         OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293                         Args);
2294   TaskgroupOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296 }
2297 
2298 /// Given an array of pointers to variables, project the address of a
2299 /// given variable.
2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301                                       unsigned Index, const VarDecl *Var) {
2302   // Pull out the pointer to the variable.
2303   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305 
2306   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307   Addr = CGF.Builder.CreateElementBitCast(
2308       Addr, CGF.ConvertTypeForMem(Var->getType()));
2309   return Addr;
2310 }
2311 
2312 static llvm::Value *emitCopyprivateCopyFunction(
2313     CodeGenModule &CGM, llvm::Type *ArgsType,
2314     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316     SourceLocation Loc) {
2317   ASTContext &C = CGM.getContext();
2318   // void copy_func(void *LHSArg, void *RHSArg);
2319   FunctionArgList Args;
2320   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321                            ImplicitParamDecl::Other);
2322   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323                            ImplicitParamDecl::Other);
2324   Args.push_back(&LHSArg);
2325   Args.push_back(&RHSArg);
2326   const auto &CGFI =
2327       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328   std::string Name =
2329       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331                                     llvm::GlobalValue::InternalLinkage, Name,
2332                                     &CGM.getModule());
2333   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334   Fn->setDoesNotRecurse();
2335   CodeGenFunction CGF(CGM);
2336   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337   // Dest = (void*[n])(LHSArg);
2338   // Src = (void*[n])(RHSArg);
2339   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341       ArgsType), CGF.getPointerAlign());
2342   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344       ArgsType), CGF.getPointerAlign());
2345   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347   // ...
2348   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2350     const auto *DestVar =
2351         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353 
2354     const auto *SrcVar =
2355         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357 
2358     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359     QualType Type = VD->getType();
2360     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361   }
2362   CGF.FinishFunction();
2363   return Fn;
2364 }
2365 
2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367                                        const RegionCodeGenTy &SingleOpGen,
2368                                        SourceLocation Loc,
2369                                        ArrayRef<const Expr *> CopyprivateVars,
2370                                        ArrayRef<const Expr *> SrcExprs,
2371                                        ArrayRef<const Expr *> DstExprs,
2372                                        ArrayRef<const Expr *> AssignmentOps) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   assert(CopyprivateVars.size() == SrcExprs.size() &&
2376          CopyprivateVars.size() == DstExprs.size() &&
2377          CopyprivateVars.size() == AssignmentOps.size());
2378   ASTContext &C = CGM.getContext();
2379   // int32 did_it = 0;
2380   // if(__kmpc_single(ident_t *, gtid)) {
2381   //   SingleOpGen();
2382   //   __kmpc_end_single(ident_t *, gtid);
2383   //   did_it = 1;
2384   // }
2385   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386   // <copy_func>, did_it);
2387 
2388   Address DidIt = Address::invalid();
2389   if (!CopyprivateVars.empty()) {
2390     // int32 did_it = 0;
2391     QualType KmpInt32Ty =
2392         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395   }
2396   // Prepare arguments and build a call to __kmpc_single
2397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399                             CGM.getModule(), OMPRTL___kmpc_single),
2400                         Args,
2401                         OMPBuilder.getOrCreateRuntimeFunction(
2402                             CGM.getModule(), OMPRTL___kmpc_end_single),
2403                         Args,
2404                         /*Conditional=*/true);
2405   SingleOpGen.setAction(Action);
2406   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407   if (DidIt.isValid()) {
2408     // did_it = 1;
2409     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410   }
2411   Action.Done(CGF);
2412   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413   // <copy_func>, did_it);
2414   if (DidIt.isValid()) {
2415     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416     QualType CopyprivateArrayTy = C.getConstantArrayType(
2417         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418         /*IndexTypeQuals=*/0);
2419     // Create a list of all private variables for copyprivate.
2420     Address CopyprivateList =
2421         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2423       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424       CGF.Builder.CreateStore(
2425           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427               CGF.VoidPtrTy),
2428           Elem);
2429     }
2430     // Build function that copies private values from single region to all other
2431     // threads in the corresponding parallel region.
2432     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436     Address CL =
2437       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438                                                       CGF.VoidPtrTy);
2439     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440     llvm::Value *Args[] = {
2441         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442         getThreadID(CGF, Loc),        // i32 <gtid>
2443         BufSize,                      // size_t <buf_size>
2444         CL.getPointer(),              // void *<copyprivate list>
2445         CpyFn,                        // void (*) (void *, void *) <copy_func>
2446         DidItVal                      // i32 did_it
2447     };
2448     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450                         Args);
2451   }
2452 }
2453 
2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455                                         const RegionCodeGenTy &OrderedOpGen,
2456                                         SourceLocation Loc, bool IsThreads) {
2457   if (!CGF.HaveInsertPoint())
2458     return;
2459   // __kmpc_ordered(ident_t *, gtid);
2460   // OrderedOpGen();
2461   // __kmpc_end_ordered(ident_t *, gtid);
2462   // Prepare arguments and build a call to __kmpc_ordered
2463   if (IsThreads) {
2464     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                               CGM.getModule(), OMPRTL___kmpc_ordered),
2467                           Args,
2468                           OMPBuilder.getOrCreateRuntimeFunction(
2469                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470                           Args);
2471     OrderedOpGen.setAction(Action);
2472     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473     return;
2474   }
2475   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476 }
2477 
2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479   unsigned Flags;
2480   if (Kind == OMPD_for)
2481     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482   else if (Kind == OMPD_sections)
2483     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484   else if (Kind == OMPD_single)
2485     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486   else if (Kind == OMPD_barrier)
2487     Flags = OMP_IDENT_BARRIER_EXPL;
2488   else
2489     Flags = OMP_IDENT_BARRIER_IMPL;
2490   return Flags;
2491 }
2492 
2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494     CodeGenFunction &CGF, const OMPLoopDirective &S,
2495     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496   // Check if the loop directive is actually a doacross loop directive. In this
2497   // case choose static, 1 schedule.
2498   if (llvm::any_of(
2499           S.getClausesOfKind<OMPOrderedClause>(),
2500           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501     ScheduleKind = OMPC_SCHEDULE_static;
2502     // Chunk size is 1 in this case.
2503     llvm::APInt ChunkSize(32, 1);
2504     ChunkExpr = IntegerLiteral::Create(
2505         CGF.getContext(), ChunkSize,
2506         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507         SourceLocation());
2508   }
2509 }
2510 
2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2513                                       bool ForceSimpleCall) {
2514   // Check if we should use the OMPBuilder
2515   auto *OMPRegionInfo =
2516       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2519         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520     return;
2521   }
2522 
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // Build call __kmpc_cancel_barrier(loc, thread_id);
2526   // Build call __kmpc_barrier(loc, thread_id);
2527   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529   // thread_id);
2530   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531                          getThreadID(CGF, Loc)};
2532   if (OMPRegionInfo) {
2533     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2534       llvm::Value *Result = CGF.EmitRuntimeCall(
2535           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536                                                 OMPRTL___kmpc_cancel_barrier),
2537           Args);
2538       if (EmitChecks) {
2539         // if (__kmpc_cancel_barrier()) {
2540         //   exit from construct;
2541         // }
2542         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546         CGF.EmitBlock(ExitBB);
2547         //   exit from construct;
2548         CodeGenFunction::JumpDest CancelDestination =
2549             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550         CGF.EmitBranchThroughCleanup(CancelDestination);
2551         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552       }
2553       return;
2554     }
2555   }
2556   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557                           CGM.getModule(), OMPRTL___kmpc_barrier),
2558                       Args);
2559 }
2560 
2561 /// Map the OpenMP loop schedule to the runtime enumeration.
2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563                                           bool Chunked, bool Ordered) {
2564   switch (ScheduleKind) {
2565   case OMPC_SCHEDULE_static:
2566     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2567                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2568   case OMPC_SCHEDULE_dynamic:
2569     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2570   case OMPC_SCHEDULE_guided:
2571     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2572   case OMPC_SCHEDULE_runtime:
2573     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2574   case OMPC_SCHEDULE_auto:
2575     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2576   case OMPC_SCHEDULE_unknown:
2577     assert(!Chunked && "chunk was specified but schedule kind not known");
2578     return Ordered ? OMP_ord_static : OMP_sch_static;
2579   }
2580   llvm_unreachable("Unexpected runtime schedule");
2581 }
2582 
2583 /// Map the OpenMP distribute schedule to the runtime enumeration.
2584 static OpenMPSchedType
2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586   // only static is allowed for dist_schedule
2587   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2588 }
2589 
2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591                                          bool Chunked) const {
2592   OpenMPSchedType Schedule =
2593       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594   return Schedule == OMP_sch_static;
2595 }
2596 
2597 bool CGOpenMPRuntime::isStaticNonchunked(
2598     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600   return Schedule == OMP_dist_sch_static;
2601 }
2602 
2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604                                       bool Chunked) const {
2605   OpenMPSchedType Schedule =
2606       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607   return Schedule == OMP_sch_static_chunked;
2608 }
2609 
2610 bool CGOpenMPRuntime::isStaticChunked(
2611     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613   return Schedule == OMP_dist_sch_static_chunked;
2614 }
2615 
2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620   return Schedule != OMP_sch_static;
2621 }
2622 
2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624                                   OpenMPScheduleClauseModifier M1,
2625                                   OpenMPScheduleClauseModifier M2) {
2626   int Modifier = 0;
2627   switch (M1) {
2628   case OMPC_SCHEDULE_MODIFIER_monotonic:
2629     Modifier = OMP_sch_modifier_monotonic;
2630     break;
2631   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632     Modifier = OMP_sch_modifier_nonmonotonic;
2633     break;
2634   case OMPC_SCHEDULE_MODIFIER_simd:
2635     if (Schedule == OMP_sch_static_chunked)
2636       Schedule = OMP_sch_static_balanced_chunked;
2637     break;
2638   case OMPC_SCHEDULE_MODIFIER_last:
2639   case OMPC_SCHEDULE_MODIFIER_unknown:
2640     break;
2641   }
2642   switch (M2) {
2643   case OMPC_SCHEDULE_MODIFIER_monotonic:
2644     Modifier = OMP_sch_modifier_monotonic;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647     Modifier = OMP_sch_modifier_nonmonotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_simd:
2650     if (Schedule == OMP_sch_static_chunked)
2651       Schedule = OMP_sch_static_balanced_chunked;
2652     break;
2653   case OMPC_SCHEDULE_MODIFIER_last:
2654   case OMPC_SCHEDULE_MODIFIER_unknown:
2655     break;
2656   }
2657   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658   // If the static schedule kind is specified or if the ordered clause is
2659   // specified, and if the nonmonotonic modifier is not specified, the effect is
2660   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661   // modifier is specified, the effect is as if the nonmonotonic modifier is
2662   // specified.
2663   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2664     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2665           Schedule == OMP_sch_static_balanced_chunked ||
2666           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2667           Schedule == OMP_dist_sch_static_chunked ||
2668           Schedule == OMP_dist_sch_static))
2669       Modifier = OMP_sch_modifier_nonmonotonic;
2670   }
2671   return Schedule | Modifier;
2672 }
2673 
2674 void CGOpenMPRuntime::emitForDispatchInit(
2675     CodeGenFunction &CGF, SourceLocation Loc,
2676     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677     bool Ordered, const DispatchRTInput &DispatchValues) {
2678   if (!CGF.HaveInsertPoint())
2679     return;
2680   OpenMPSchedType Schedule = getRuntimeSchedule(
2681       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682   assert(Ordered ||
2683          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685           Schedule != OMP_sch_static_balanced_chunked));
2686   // Call __kmpc_dispatch_init(
2687   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690 
2691   // If the Chunk was not specified in the clause - use default value 1.
2692   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693                                             : CGF.Builder.getIntN(IVSize, 1);
2694   llvm::Value *Args[] = {
2695       emitUpdateLocation(CGF, Loc),
2696       getThreadID(CGF, Loc),
2697       CGF.Builder.getInt32(addMonoNonMonoModifier(
2698           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699       DispatchValues.LB,                                     // Lower
2700       DispatchValues.UB,                                     // Upper
2701       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702       Chunk                                                  // Chunk
2703   };
2704   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705 }
2706 
2707 static void emitForStaticInitCall(
2708     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711     const CGOpenMPRuntime::StaticRTInput &Values) {
2712   if (!CGF.HaveInsertPoint())
2713     return;
2714 
2715   assert(!Values.Ordered);
2716   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717          Schedule == OMP_sch_static_balanced_chunked ||
2718          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719          Schedule == OMP_dist_sch_static ||
2720          Schedule == OMP_dist_sch_static_chunked);
2721 
2722   // Call __kmpc_for_static_init(
2723   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727   llvm::Value *Chunk = Values.Chunk;
2728   if (Chunk == nullptr) {
2729     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730             Schedule == OMP_dist_sch_static) &&
2731            "expected static non-chunked schedule");
2732     // If the Chunk was not specified in the clause - use default value 1.
2733     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734   } else {
2735     assert((Schedule == OMP_sch_static_chunked ||
2736             Schedule == OMP_sch_static_balanced_chunked ||
2737             Schedule == OMP_ord_static_chunked ||
2738             Schedule == OMP_dist_sch_static_chunked) &&
2739            "expected static chunked schedule");
2740   }
2741   llvm::Value *Args[] = {
2742       UpdateLocation,
2743       ThreadId,
2744       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745                                                   M2)), // Schedule type
2746       Values.IL.getPointer(),                           // &isLastIter
2747       Values.LB.getPointer(),                           // &LB
2748       Values.UB.getPointer(),                           // &UB
2749       Values.ST.getPointer(),                           // &Stride
2750       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751       Chunk                                             // Chunk
2752   };
2753   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754 }
2755 
2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757                                         SourceLocation Loc,
2758                                         OpenMPDirectiveKind DKind,
2759                                         const OpenMPScheduleTy &ScheduleKind,
2760                                         const StaticRTInput &Values) {
2761   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763   assert(isOpenMPWorksharingDirective(DKind) &&
2764          "Expected loop-based or sections-based directive.");
2765   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766                                              isOpenMPLoopDirective(DKind)
2767                                                  ? OMP_IDENT_WORK_LOOP
2768                                                  : OMP_IDENT_WORK_SECTIONS);
2769   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770   llvm::FunctionCallee StaticInitFunction =
2771       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775 }
2776 
2777 void CGOpenMPRuntime::emitDistributeStaticInit(
2778     CodeGenFunction &CGF, SourceLocation Loc,
2779     OpenMPDistScheduleClauseKind SchedKind,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   OpenMPSchedType ScheduleNum =
2782       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783   llvm::Value *UpdatedLocation =
2784       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786   llvm::FunctionCallee StaticInitFunction =
2787       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791 }
2792 
2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794                                           SourceLocation Loc,
2795                                           OpenMPDirectiveKind DKind) {
2796   if (!CGF.HaveInsertPoint())
2797     return;
2798   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799   llvm::Value *Args[] = {
2800       emitUpdateLocation(CGF, Loc,
2801                          isOpenMPDistributeDirective(DKind)
2802                              ? OMP_IDENT_WORK_DISTRIBUTE
2803                              : isOpenMPLoopDirective(DKind)
2804                                    ? OMP_IDENT_WORK_LOOP
2805                                    : OMP_IDENT_WORK_SECTIONS),
2806       getThreadID(CGF, Loc)};
2807   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810                       Args);
2811 }
2812 
2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814                                                  SourceLocation Loc,
2815                                                  unsigned IVSize,
2816                                                  bool IVSigned) {
2817   if (!CGF.HaveInsertPoint())
2818     return;
2819   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822 }
2823 
2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825                                           SourceLocation Loc, unsigned IVSize,
2826                                           bool IVSigned, Address IL,
2827                                           Address LB, Address UB,
2828                                           Address ST) {
2829   // Call __kmpc_dispatch_next(
2830   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832   //          kmp_int[32|64] *p_stride);
2833   llvm::Value *Args[] = {
2834       emitUpdateLocation(CGF, Loc),
2835       getThreadID(CGF, Loc),
2836       IL.getPointer(), // &isLastIter
2837       LB.getPointer(), // &Lower
2838       UB.getPointer(), // &Upper
2839       ST.getPointer()  // &Stride
2840   };
2841   llvm::Value *Call =
2842       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843   return CGF.EmitScalarConversion(
2844       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845       CGF.getContext().BoolTy, Loc);
2846 }
2847 
2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849                                            llvm::Value *NumThreads,
2850                                            SourceLocation Loc) {
2851   if (!CGF.HaveInsertPoint())
2852     return;
2853   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854   llvm::Value *Args[] = {
2855       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859                       Args);
2860 }
2861 
2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863                                          ProcBindKind ProcBind,
2864                                          SourceLocation Loc) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869   llvm::Value *Args[] = {
2870       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874                       Args);
2875 }
2876 
2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2879   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880     OMPBuilder.CreateFlush(CGF.Builder);
2881   } else {
2882     if (!CGF.HaveInsertPoint())
2883       return;
2884     // Build call void __kmpc_flush(ident_t *loc)
2885     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886                             CGM.getModule(), OMPRTL___kmpc_flush),
2887                         emitUpdateLocation(CGF, Loc));
2888   }
2889 }
2890 
2891 namespace {
2892 /// Indexes of fields for type kmp_task_t.
2893 enum KmpTaskTFields {
2894   /// List of shared variables.
2895   KmpTaskTShareds,
2896   /// Task routine.
2897   KmpTaskTRoutine,
2898   /// Partition id for the untied tasks.
2899   KmpTaskTPartId,
2900   /// Function with call of destructors for private variables.
2901   Data1,
2902   /// Task priority.
2903   Data2,
2904   /// (Taskloops only) Lower bound.
2905   KmpTaskTLowerBound,
2906   /// (Taskloops only) Upper bound.
2907   KmpTaskTUpperBound,
2908   /// (Taskloops only) Stride.
2909   KmpTaskTStride,
2910   /// (Taskloops only) Is last iteration flag.
2911   KmpTaskTLastIter,
2912   /// (Taskloops only) Reduction data.
2913   KmpTaskTReductions,
2914 };
2915 } // anonymous namespace
2916 
2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918   return OffloadEntriesTargetRegion.empty() &&
2919          OffloadEntriesDeviceGlobalVar.empty();
2920 }
2921 
2922 /// Initialize target region entry.
2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925                                     StringRef ParentName, unsigned LineNum,
2926                                     unsigned Order) {
2927   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928                                              "only required for the device "
2929                                              "code generation.");
2930   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932                                    OMPTargetRegionEntryTargetRegion);
2933   ++OffloadingEntriesNum;
2934 }
2935 
2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938                                   StringRef ParentName, unsigned LineNum,
2939                                   llvm::Constant *Addr, llvm::Constant *ID,
2940                                   OMPTargetRegionEntryKind Flags) {
2941   // If we are emitting code for a target, the entry is already initialized,
2942   // only has to be registered.
2943   if (CGM.getLangOpts().OpenMPIsDevice) {
2944     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2945       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2946           DiagnosticsEngine::Error,
2947           "Unable to find target region on line '%0' in the device code.");
2948       CGM.getDiags().Report(DiagID) << LineNum;
2949       return;
2950     }
2951     auto &Entry =
2952         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2953     assert(Entry.isValid() && "Entry not initialized!");
2954     Entry.setAddress(Addr);
2955     Entry.setID(ID);
2956     Entry.setFlags(Flags);
2957   } else {
2958     if (Flags ==
2959             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2960         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2961                                  /*IgnoreAddressId*/ true))
2962       return;
2963     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2964            "Target region entry already registered!");
2965     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2966     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2967     ++OffloadingEntriesNum;
2968   }
2969 }
2970 
2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2972     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2973     bool IgnoreAddressId) const {
2974   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2975   if (PerDevice == OffloadEntriesTargetRegion.end())
2976     return false;
2977   auto PerFile = PerDevice->second.find(FileID);
2978   if (PerFile == PerDevice->second.end())
2979     return false;
2980   auto PerParentName = PerFile->second.find(ParentName);
2981   if (PerParentName == PerFile->second.end())
2982     return false;
2983   auto PerLine = PerParentName->second.find(LineNum);
2984   if (PerLine == PerParentName->second.end())
2985     return false;
2986   // Fail if this entry is already registered.
2987   if (!IgnoreAddressId &&
2988       (PerLine->second.getAddress() || PerLine->second.getID()))
2989     return false;
2990   return true;
2991 }
2992 
2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2994     const OffloadTargetRegionEntryInfoActTy &Action) {
2995   // Scan all target region entries and perform the provided action.
2996   for (const auto &D : OffloadEntriesTargetRegion)
2997     for (const auto &F : D.second)
2998       for (const auto &P : F.second)
2999         for (const auto &L : P.second)
3000           Action(D.first, F.first, P.first(), L.first, L.second);
3001 }
3002 
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3004     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3005                                        OMPTargetGlobalVarEntryKind Flags,
3006                                        unsigned Order) {
3007   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3008                                              "only required for the device "
3009                                              "code generation.");
3010   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3011   ++OffloadingEntriesNum;
3012 }
3013 
3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3015     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3016                                      CharUnits VarSize,
3017                                      OMPTargetGlobalVarEntryKind Flags,
3018                                      llvm::GlobalValue::LinkageTypes Linkage) {
3019   if (CGM.getLangOpts().OpenMPIsDevice) {
3020     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3021     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3022            "Entry not initialized!");
3023     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3024            "Resetting with the new address.");
3025     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3026       if (Entry.getVarSize().isZero()) {
3027         Entry.setVarSize(VarSize);
3028         Entry.setLinkage(Linkage);
3029       }
3030       return;
3031     }
3032     Entry.setVarSize(VarSize);
3033     Entry.setLinkage(Linkage);
3034     Entry.setAddress(Addr);
3035   } else {
3036     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3037       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3038       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3039              "Entry not initialized!");
3040       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3041              "Resetting with the new address.");
3042       if (Entry.getVarSize().isZero()) {
3043         Entry.setVarSize(VarSize);
3044         Entry.setLinkage(Linkage);
3045       }
3046       return;
3047     }
3048     OffloadEntriesDeviceGlobalVar.try_emplace(
3049         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3050     ++OffloadingEntriesNum;
3051   }
3052 }
3053 
3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3055     actOnDeviceGlobalVarEntriesInfo(
3056         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3057   // Scan all target region entries and perform the provided action.
3058   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3059     Action(E.getKey(), E.getValue());
3060 }
3061 
3062 void CGOpenMPRuntime::createOffloadEntry(
3063     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3064     llvm::GlobalValue::LinkageTypes Linkage) {
3065   StringRef Name = Addr->getName();
3066   llvm::Module &M = CGM.getModule();
3067   llvm::LLVMContext &C = M.getContext();
3068 
3069   // Create constant string with the name.
3070   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3071 
3072   std::string StringName = getName({"omp_offloading", "entry_name"});
3073   auto *Str = new llvm::GlobalVariable(
3074       M, StrPtrInit->getType(), /*isConstant=*/true,
3075       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3076   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3077 
3078   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3079                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3080                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3081                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3082                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3083   std::string EntryName = getName({"omp_offloading", "entry", ""});
3084   llvm::GlobalVariable *Entry = createGlobalStruct(
3085       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3086       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3087 
3088   // The entry has to be created in the section the linker expects it to be.
3089   Entry->setSection("omp_offloading_entries");
3090 }
3091 
3092 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3093   // Emit the offloading entries and metadata so that the device codegen side
3094   // can easily figure out what to emit. The produced metadata looks like
3095   // this:
3096   //
3097   // !omp_offload.info = !{!1, ...}
3098   //
3099   // Right now we only generate metadata for function that contain target
3100   // regions.
3101 
3102   // If we are in simd mode or there are no entries, we don't need to do
3103   // anything.
3104   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3105     return;
3106 
3107   llvm::Module &M = CGM.getModule();
3108   llvm::LLVMContext &C = M.getContext();
3109   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3110                          SourceLocation, StringRef>,
3111               16>
3112       OrderedEntries(OffloadEntriesInfoManager.size());
3113   llvm::SmallVector<StringRef, 16> ParentFunctions(
3114       OffloadEntriesInfoManager.size());
3115 
3116   // Auxiliary methods to create metadata values and strings.
3117   auto &&GetMDInt = [this](unsigned V) {
3118     return llvm::ConstantAsMetadata::get(
3119         llvm::ConstantInt::get(CGM.Int32Ty, V));
3120   };
3121 
3122   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3123 
3124   // Create the offloading info metadata node.
3125   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3126 
3127   // Create function that emits metadata for each target region entry;
3128   auto &&TargetRegionMetadataEmitter =
3129       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3130        &GetMDString](
3131           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3132           unsigned Line,
3133           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3134         // Generate metadata for target regions. Each entry of this metadata
3135         // contains:
3136         // - Entry 0 -> Kind of this type of metadata (0).
3137         // - Entry 1 -> Device ID of the file where the entry was identified.
3138         // - Entry 2 -> File ID of the file where the entry was identified.
3139         // - Entry 3 -> Mangled name of the function where the entry was
3140         // identified.
3141         // - Entry 4 -> Line in the file where the entry was identified.
3142         // - Entry 5 -> Order the entry was created.
3143         // The first element of the metadata node is the kind.
3144         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3145                                  GetMDInt(FileID),      GetMDString(ParentName),
3146                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3147 
3148         SourceLocation Loc;
3149         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3150                   E = CGM.getContext().getSourceManager().fileinfo_end();
3151              I != E; ++I) {
3152           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3153               I->getFirst()->getUniqueID().getFile() == FileID) {
3154             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3155                 I->getFirst(), Line, 1);
3156             break;
3157           }
3158         }
3159         // Save this entry in the right position of the ordered entries array.
3160         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3161         ParentFunctions[E.getOrder()] = ParentName;
3162 
3163         // Add metadata to the named metadata node.
3164         MD->addOperand(llvm::MDNode::get(C, Ops));
3165       };
3166 
3167   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3168       TargetRegionMetadataEmitter);
3169 
3170   // Create function that emits metadata for each device global variable entry;
3171   auto &&DeviceGlobalVarMetadataEmitter =
3172       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3173        MD](StringRef MangledName,
3174            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3175                &E) {
3176         // Generate metadata for global variables. Each entry of this metadata
3177         // contains:
3178         // - Entry 0 -> Kind of this type of metadata (1).
3179         // - Entry 1 -> Mangled name of the variable.
3180         // - Entry 2 -> Declare target kind.
3181         // - Entry 3 -> Order the entry was created.
3182         // The first element of the metadata node is the kind.
3183         llvm::Metadata *Ops[] = {
3184             GetMDInt(E.getKind()), GetMDString(MangledName),
3185             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3186 
3187         // Save this entry in the right position of the ordered entries array.
3188         OrderedEntries[E.getOrder()] =
3189             std::make_tuple(&E, SourceLocation(), MangledName);
3190 
3191         // Add metadata to the named metadata node.
3192         MD->addOperand(llvm::MDNode::get(C, Ops));
3193       };
3194 
3195   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3196       DeviceGlobalVarMetadataEmitter);
3197 
3198   for (const auto &E : OrderedEntries) {
3199     assert(std::get<0>(E) && "All ordered entries must exist!");
3200     if (const auto *CE =
3201             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3202                 std::get<0>(E))) {
3203       if (!CE->getID() || !CE->getAddress()) {
3204         // Do not blame the entry if the parent funtion is not emitted.
3205         StringRef FnName = ParentFunctions[CE->getOrder()];
3206         if (!CGM.GetGlobalValue(FnName))
3207           continue;
3208         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3209             DiagnosticsEngine::Error,
3210             "Offloading entry for target region in %0 is incorrect: either the "
3211             "address or the ID is invalid.");
3212         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3213         continue;
3214       }
3215       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3216                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3217     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3218                                              OffloadEntryInfoDeviceGlobalVar>(
3219                    std::get<0>(E))) {
3220       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3221           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3222               CE->getFlags());
3223       switch (Flags) {
3224       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3225         if (CGM.getLangOpts().OpenMPIsDevice &&
3226             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3227           continue;
3228         if (!CE->getAddress()) {
3229           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3230               DiagnosticsEngine::Error, "Offloading entry for declare target "
3231                                         "variable %0 is incorrect: the "
3232                                         "address is invalid.");
3233           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3234           continue;
3235         }
3236         // The vaiable has no definition - no need to add the entry.
3237         if (CE->getVarSize().isZero())
3238           continue;
3239         break;
3240       }
3241       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3242         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3243                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3244                "Declaret target link address is set.");
3245         if (CGM.getLangOpts().OpenMPIsDevice)
3246           continue;
3247         if (!CE->getAddress()) {
3248           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3249               DiagnosticsEngine::Error,
3250               "Offloading entry for declare target variable is incorrect: the "
3251               "address is invalid.");
3252           CGM.getDiags().Report(DiagID);
3253           continue;
3254         }
3255         break;
3256       }
3257       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3258                          CE->getVarSize().getQuantity(), Flags,
3259                          CE->getLinkage());
3260     } else {
3261       llvm_unreachable("Unsupported entry kind.");
3262     }
3263   }
3264 }
3265 
3266 /// Loads all the offload entries information from the host IR
3267 /// metadata.
3268 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3269   // If we are in target mode, load the metadata from the host IR. This code has
3270   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3271 
3272   if (!CGM.getLangOpts().OpenMPIsDevice)
3273     return;
3274 
3275   if (CGM.getLangOpts().OMPHostIRFile.empty())
3276     return;
3277 
3278   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3279   if (auto EC = Buf.getError()) {
3280     CGM.getDiags().Report(diag::err_cannot_open_file)
3281         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3282     return;
3283   }
3284 
3285   llvm::LLVMContext C;
3286   auto ME = expectedToErrorOrAndEmitErrors(
3287       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3288 
3289   if (auto EC = ME.getError()) {
3290     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3291         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3292     CGM.getDiags().Report(DiagID)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3298   if (!MD)
3299     return;
3300 
3301   for (llvm::MDNode *MN : MD->operands()) {
3302     auto &&GetMDInt = [MN](unsigned Idx) {
3303       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3304       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3305     };
3306 
3307     auto &&GetMDString = [MN](unsigned Idx) {
3308       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3309       return V->getString();
3310     };
3311 
3312     switch (GetMDInt(0)) {
3313     default:
3314       llvm_unreachable("Unexpected metadata!");
3315       break;
3316     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3317         OffloadingEntryInfoTargetRegion:
3318       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3319           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3320           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3321           /*Order=*/GetMDInt(5));
3322       break;
3323     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3324         OffloadingEntryInfoDeviceGlobalVar:
3325       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3326           /*MangledName=*/GetMDString(1),
3327           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3328               /*Flags=*/GetMDInt(2)),
3329           /*Order=*/GetMDInt(3));
3330       break;
3331     }
3332   }
3333 }
3334 
3335 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3336   if (!KmpRoutineEntryPtrTy) {
3337     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3338     ASTContext &C = CGM.getContext();
3339     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3340     FunctionProtoType::ExtProtoInfo EPI;
3341     KmpRoutineEntryPtrQTy = C.getPointerType(
3342         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3343     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3344   }
3345 }
3346 
3347 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3348   // Make sure the type of the entry is already created. This is the type we
3349   // have to create:
3350   // struct __tgt_offload_entry{
3351   //   void      *addr;       // Pointer to the offload entry info.
3352   //                          // (function or global)
3353   //   char      *name;       // Name of the function or global.
3354   //   size_t     size;       // Size of the entry info (0 if it a function).
3355   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3356   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3357   // };
3358   if (TgtOffloadEntryQTy.isNull()) {
3359     ASTContext &C = CGM.getContext();
3360     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3361     RD->startDefinition();
3362     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3363     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3364     addFieldToRecordDecl(C, RD, C.getSizeType());
3365     addFieldToRecordDecl(
3366         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3367     addFieldToRecordDecl(
3368         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3369     RD->completeDefinition();
3370     RD->addAttr(PackedAttr::CreateImplicit(C));
3371     TgtOffloadEntryQTy = C.getRecordType(RD);
3372   }
3373   return TgtOffloadEntryQTy;
3374 }
3375 
3376 namespace {
3377 struct PrivateHelpersTy {
3378   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3379                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3380       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3381         PrivateElemInit(PrivateElemInit) {}
3382   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3383   const Expr *OriginalRef = nullptr;
3384   const VarDecl *Original = nullptr;
3385   const VarDecl *PrivateCopy = nullptr;
3386   const VarDecl *PrivateElemInit = nullptr;
3387   bool isLocalPrivate() const {
3388     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3389   }
3390 };
3391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3392 } // anonymous namespace
3393 
3394 static bool isAllocatableDecl(const VarDecl *VD) {
3395   const VarDecl *CVD = VD->getCanonicalDecl();
3396   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3397     return false;
3398   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3399   // Use the default allocation.
3400   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3401             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3402            !AA->getAllocator());
3403 }
3404 
3405 static RecordDecl *
3406 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3407   if (!Privates.empty()) {
3408     ASTContext &C = CGM.getContext();
3409     // Build struct .kmp_privates_t. {
3410     //         /*  private vars  */
3411     //       };
3412     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3413     RD->startDefinition();
3414     for (const auto &Pair : Privates) {
3415       const VarDecl *VD = Pair.second.Original;
3416       QualType Type = VD->getType().getNonReferenceType();
3417       // If the private variable is a local variable with lvalue ref type,
3418       // allocate the pointer instead of the pointee type.
3419       if (Pair.second.isLocalPrivate()) {
3420         if (VD->getType()->isLValueReferenceType())
3421           Type = C.getPointerType(Type);
3422         if (isAllocatableDecl(VD))
3423           Type = C.getPointerType(Type);
3424       }
3425       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3426       if (VD->hasAttrs()) {
3427         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3428              E(VD->getAttrs().end());
3429              I != E; ++I)
3430           FD->addAttr(*I);
3431       }
3432     }
3433     RD->completeDefinition();
3434     return RD;
3435   }
3436   return nullptr;
3437 }
3438 
3439 static RecordDecl *
3440 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3441                          QualType KmpInt32Ty,
3442                          QualType KmpRoutineEntryPointerQTy) {
3443   ASTContext &C = CGM.getContext();
3444   // Build struct kmp_task_t {
3445   //         void *              shareds;
3446   //         kmp_routine_entry_t routine;
3447   //         kmp_int32           part_id;
3448   //         kmp_cmplrdata_t data1;
3449   //         kmp_cmplrdata_t data2;
3450   // For taskloops additional fields:
3451   //         kmp_uint64          lb;
3452   //         kmp_uint64          ub;
3453   //         kmp_int64           st;
3454   //         kmp_int32           liter;
3455   //         void *              reductions;
3456   //       };
3457   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3458   UD->startDefinition();
3459   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3460   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3461   UD->completeDefinition();
3462   QualType KmpCmplrdataTy = C.getRecordType(UD);
3463   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3464   RD->startDefinition();
3465   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3466   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3467   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3468   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3469   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3470   if (isOpenMPTaskLoopDirective(Kind)) {
3471     QualType KmpUInt64Ty =
3472         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3473     QualType KmpInt64Ty =
3474         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3475     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3476     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3477     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3478     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3479     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3480   }
3481   RD->completeDefinition();
3482   return RD;
3483 }
3484 
3485 static RecordDecl *
3486 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3487                                      ArrayRef<PrivateDataTy> Privates) {
3488   ASTContext &C = CGM.getContext();
3489   // Build struct kmp_task_t_with_privates {
3490   //         kmp_task_t task_data;
3491   //         .kmp_privates_t. privates;
3492   //       };
3493   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3494   RD->startDefinition();
3495   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3496   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3497     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3498   RD->completeDefinition();
3499   return RD;
3500 }
3501 
3502 /// Emit a proxy function which accepts kmp_task_t as the second
3503 /// argument.
3504 /// \code
3505 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3506 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3507 ///   For taskloops:
3508 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3509 ///   tt->reductions, tt->shareds);
3510 ///   return 0;
3511 /// }
3512 /// \endcode
3513 static llvm::Function *
3514 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3515                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3516                       QualType KmpTaskTWithPrivatesPtrQTy,
3517                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3518                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3519                       llvm::Value *TaskPrivatesMap) {
3520   ASTContext &C = CGM.getContext();
3521   FunctionArgList Args;
3522   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3523                             ImplicitParamDecl::Other);
3524   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3525                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3526                                 ImplicitParamDecl::Other);
3527   Args.push_back(&GtidArg);
3528   Args.push_back(&TaskTypeArg);
3529   const auto &TaskEntryFnInfo =
3530       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3531   llvm::FunctionType *TaskEntryTy =
3532       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3533   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3534   auto *TaskEntry = llvm::Function::Create(
3535       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3536   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3537   TaskEntry->setDoesNotRecurse();
3538   CodeGenFunction CGF(CGM);
3539   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3540                     Loc, Loc);
3541 
3542   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3543   // tt,
3544   // For taskloops:
3545   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3546   // tt->task_data.shareds);
3547   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3548       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3549   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3550       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3551       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3552   const auto *KmpTaskTWithPrivatesQTyRD =
3553       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3554   LValue Base =
3555       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3556   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3557   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3558   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3559   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3560 
3561   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3562   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3563   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3564       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3565       CGF.ConvertTypeForMem(SharedsPtrTy));
3566 
3567   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3568   llvm::Value *PrivatesParam;
3569   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3570     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3571     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3572         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3573   } else {
3574     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3575   }
3576 
3577   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3578                                TaskPrivatesMap,
3579                                CGF.Builder
3580                                    .CreatePointerBitCastOrAddrSpaceCast(
3581                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3582                                    .getPointer()};
3583   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3584                                           std::end(CommonArgs));
3585   if (isOpenMPTaskLoopDirective(Kind)) {
3586     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3587     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3588     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3589     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3590     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3591     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3592     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3593     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3594     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3595     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3596     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3597     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3598     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3599     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3600     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3601     CallArgs.push_back(LBParam);
3602     CallArgs.push_back(UBParam);
3603     CallArgs.push_back(StParam);
3604     CallArgs.push_back(LIParam);
3605     CallArgs.push_back(RParam);
3606   }
3607   CallArgs.push_back(SharedsParam);
3608 
3609   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3610                                                   CallArgs);
3611   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3612                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3613   CGF.FinishFunction();
3614   return TaskEntry;
3615 }
3616 
3617 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3618                                             SourceLocation Loc,
3619                                             QualType KmpInt32Ty,
3620                                             QualType KmpTaskTWithPrivatesPtrQTy,
3621                                             QualType KmpTaskTWithPrivatesQTy) {
3622   ASTContext &C = CGM.getContext();
3623   FunctionArgList Args;
3624   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3625                             ImplicitParamDecl::Other);
3626   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3627                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3628                                 ImplicitParamDecl::Other);
3629   Args.push_back(&GtidArg);
3630   Args.push_back(&TaskTypeArg);
3631   const auto &DestructorFnInfo =
3632       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3633   llvm::FunctionType *DestructorFnTy =
3634       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3635   std::string Name =
3636       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3637   auto *DestructorFn =
3638       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3639                              Name, &CGM.getModule());
3640   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3641                                     DestructorFnInfo);
3642   DestructorFn->setDoesNotRecurse();
3643   CodeGenFunction CGF(CGM);
3644   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3645                     Args, Loc, Loc);
3646 
3647   LValue Base = CGF.EmitLoadOfPointerLValue(
3648       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3649       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3650   const auto *KmpTaskTWithPrivatesQTyRD =
3651       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3652   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3653   Base = CGF.EmitLValueForField(Base, *FI);
3654   for (const auto *Field :
3655        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3656     if (QualType::DestructionKind DtorKind =
3657             Field->getType().isDestructedType()) {
3658       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3659       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3660     }
3661   }
3662   CGF.FinishFunction();
3663   return DestructorFn;
3664 }
3665 
3666 /// Emit a privates mapping function for correct handling of private and
3667 /// firstprivate variables.
3668 /// \code
3669 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3670 /// **noalias priv1,...,  <tyn> **noalias privn) {
3671 ///   *priv1 = &.privates.priv1;
3672 ///   ...;
3673 ///   *privn = &.privates.privn;
3674 /// }
3675 /// \endcode
3676 static llvm::Value *
3677 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3678                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3679                                ArrayRef<PrivateDataTy> Privates) {
3680   ASTContext &C = CGM.getContext();
3681   FunctionArgList Args;
3682   ImplicitParamDecl TaskPrivatesArg(
3683       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3684       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3685       ImplicitParamDecl::Other);
3686   Args.push_back(&TaskPrivatesArg);
3687   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3688   unsigned Counter = 1;
3689   for (const Expr *E : Data.PrivateVars) {
3690     Args.push_back(ImplicitParamDecl::Create(
3691         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3692         C.getPointerType(C.getPointerType(E->getType()))
3693             .withConst()
3694             .withRestrict(),
3695         ImplicitParamDecl::Other));
3696     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3697     PrivateVarsPos[VD] = Counter;
3698     ++Counter;
3699   }
3700   for (const Expr *E : Data.FirstprivateVars) {
3701     Args.push_back(ImplicitParamDecl::Create(
3702         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3703         C.getPointerType(C.getPointerType(E->getType()))
3704             .withConst()
3705             .withRestrict(),
3706         ImplicitParamDecl::Other));
3707     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3708     PrivateVarsPos[VD] = Counter;
3709     ++Counter;
3710   }
3711   for (const Expr *E : Data.LastprivateVars) {
3712     Args.push_back(ImplicitParamDecl::Create(
3713         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3714         C.getPointerType(C.getPointerType(E->getType()))
3715             .withConst()
3716             .withRestrict(),
3717         ImplicitParamDecl::Other));
3718     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3719     PrivateVarsPos[VD] = Counter;
3720     ++Counter;
3721   }
3722   for (const VarDecl *VD : Data.PrivateLocals) {
3723     QualType Ty = VD->getType().getNonReferenceType();
3724     if (VD->getType()->isLValueReferenceType())
3725       Ty = C.getPointerType(Ty);
3726     if (isAllocatableDecl(VD))
3727       Ty = C.getPointerType(Ty);
3728     Args.push_back(ImplicitParamDecl::Create(
3729         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3731         ImplicitParamDecl::Other));
3732     PrivateVarsPos[VD] = Counter;
3733     ++Counter;
3734   }
3735   const auto &TaskPrivatesMapFnInfo =
3736       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3737   llvm::FunctionType *TaskPrivatesMapTy =
3738       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3739   std::string Name =
3740       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3741   auto *TaskPrivatesMap = llvm::Function::Create(
3742       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3743       &CGM.getModule());
3744   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3745                                     TaskPrivatesMapFnInfo);
3746   if (CGM.getLangOpts().Optimize) {
3747     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3748     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3749     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3750   }
3751   CodeGenFunction CGF(CGM);
3752   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3753                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3754 
3755   // *privi = &.privates.privi;
3756   LValue Base = CGF.EmitLoadOfPointerLValue(
3757       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3758       TaskPrivatesArg.getType()->castAs<PointerType>());
3759   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3760   Counter = 0;
3761   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3762     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3763     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3764     LValue RefLVal =
3765         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3766     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3767         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3768     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3769     ++Counter;
3770   }
3771   CGF.FinishFunction();
3772   return TaskPrivatesMap;
3773 }
3774 
3775 /// Emit initialization for private variables in task-based directives.
3776 static void emitPrivatesInit(CodeGenFunction &CGF,
3777                              const OMPExecutableDirective &D,
3778                              Address KmpTaskSharedsPtr, LValue TDBase,
3779                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3780                              QualType SharedsTy, QualType SharedsPtrTy,
3781                              const OMPTaskDataTy &Data,
3782                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3783   ASTContext &C = CGF.getContext();
3784   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3785   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3786   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3787                                  ? OMPD_taskloop
3788                                  : OMPD_task;
3789   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3790   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3791   LValue SrcBase;
3792   bool IsTargetTask =
3793       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3794       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3795   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3796   // PointersArray, SizesArray, and MappersArray. The original variables for
3797   // these arrays are not captured and we get their addresses explicitly.
3798   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3799       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3800     SrcBase = CGF.MakeAddrLValue(
3801         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3802             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3803         SharedsTy);
3804   }
3805   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3806   for (const PrivateDataTy &Pair : Privates) {
3807     // Do not initialize private locals.
3808     if (Pair.second.isLocalPrivate()) {
3809       ++FI;
3810       continue;
3811     }
3812     const VarDecl *VD = Pair.second.PrivateCopy;
3813     const Expr *Init = VD->getAnyInitializer();
3814     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3815                              !CGF.isTrivialInitializer(Init)))) {
3816       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3817       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3818         const VarDecl *OriginalVD = Pair.second.Original;
3819         // Check if the variable is the target-based BasePointersArray,
3820         // PointersArray, SizesArray, or MappersArray.
3821         LValue SharedRefLValue;
3822         QualType Type = PrivateLValue.getType();
3823         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3824         if (IsTargetTask && !SharedField) {
3825           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3826                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3827                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3828                          ->getNumParams() == 0 &&
3829                  isa<TranslationUnitDecl>(
3830                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3831                          ->getDeclContext()) &&
3832                  "Expected artificial target data variable.");
3833           SharedRefLValue =
3834               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3835         } else if (ForDup) {
3836           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3837           SharedRefLValue = CGF.MakeAddrLValue(
3838               Address(SharedRefLValue.getPointer(CGF),
3839                       C.getDeclAlign(OriginalVD)),
3840               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3841               SharedRefLValue.getTBAAInfo());
3842         } else if (CGF.LambdaCaptureFields.count(
3843                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3844                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3845           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3846         } else {
3847           // Processing for implicitly captured variables.
3848           InlinedOpenMPRegionRAII Region(
3849               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3850               /*HasCancel=*/false);
3851           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3852         }
3853         if (Type->isArrayType()) {
3854           // Initialize firstprivate array.
3855           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3856             // Perform simple memcpy.
3857             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3858           } else {
3859             // Initialize firstprivate array using element-by-element
3860             // initialization.
3861             CGF.EmitOMPAggregateAssign(
3862                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3863                 Type,
3864                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3865                                                   Address SrcElement) {
3866                   // Clean up any temporaries needed by the initialization.
3867                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3868                   InitScope.addPrivate(
3869                       Elem, [SrcElement]() -> Address { return SrcElement; });
3870                   (void)InitScope.Privatize();
3871                   // Emit initialization for single element.
3872                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3873                       CGF, &CapturesInfo);
3874                   CGF.EmitAnyExprToMem(Init, DestElement,
3875                                        Init->getType().getQualifiers(),
3876                                        /*IsInitializer=*/false);
3877                 });
3878           }
3879         } else {
3880           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3881           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3882             return SharedRefLValue.getAddress(CGF);
3883           });
3884           (void)InitScope.Privatize();
3885           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3886           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3887                              /*capturedByInit=*/false);
3888         }
3889       } else {
3890         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3891       }
3892     }
3893     ++FI;
3894   }
3895 }
3896 
3897 /// Check if duplication function is required for taskloops.
3898 static bool checkInitIsRequired(CodeGenFunction &CGF,
3899                                 ArrayRef<PrivateDataTy> Privates) {
3900   bool InitRequired = false;
3901   for (const PrivateDataTy &Pair : Privates) {
3902     if (Pair.second.isLocalPrivate())
3903       continue;
3904     const VarDecl *VD = Pair.second.PrivateCopy;
3905     const Expr *Init = VD->getAnyInitializer();
3906     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3907                                     !CGF.isTrivialInitializer(Init));
3908     if (InitRequired)
3909       break;
3910   }
3911   return InitRequired;
3912 }
3913 
3914 
3915 /// Emit task_dup function (for initialization of
3916 /// private/firstprivate/lastprivate vars and last_iter flag)
3917 /// \code
3918 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3919 /// lastpriv) {
3920 /// // setup lastprivate flag
3921 ///    task_dst->last = lastpriv;
3922 /// // could be constructor calls here...
3923 /// }
3924 /// \endcode
3925 static llvm::Value *
3926 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3927                     const OMPExecutableDirective &D,
3928                     QualType KmpTaskTWithPrivatesPtrQTy,
3929                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3930                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3931                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3932                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3933   ASTContext &C = CGM.getContext();
3934   FunctionArgList Args;
3935   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3936                            KmpTaskTWithPrivatesPtrQTy,
3937                            ImplicitParamDecl::Other);
3938   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3939                            KmpTaskTWithPrivatesPtrQTy,
3940                            ImplicitParamDecl::Other);
3941   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3942                                 ImplicitParamDecl::Other);
3943   Args.push_back(&DstArg);
3944   Args.push_back(&SrcArg);
3945   Args.push_back(&LastprivArg);
3946   const auto &TaskDupFnInfo =
3947       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3948   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3949   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3950   auto *TaskDup = llvm::Function::Create(
3951       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3952   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3953   TaskDup->setDoesNotRecurse();
3954   CodeGenFunction CGF(CGM);
3955   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3956                     Loc);
3957 
3958   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3959       CGF.GetAddrOfLocalVar(&DstArg),
3960       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3961   // task_dst->liter = lastpriv;
3962   if (WithLastIter) {
3963     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3964     LValue Base = CGF.EmitLValueForField(
3965         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3966     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3967     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3968         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3969     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3970   }
3971 
3972   // Emit initial values for private copies (if any).
3973   assert(!Privates.empty());
3974   Address KmpTaskSharedsPtr = Address::invalid();
3975   if (!Data.FirstprivateVars.empty()) {
3976     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3977         CGF.GetAddrOfLocalVar(&SrcArg),
3978         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3979     LValue Base = CGF.EmitLValueForField(
3980         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3981     KmpTaskSharedsPtr = Address(
3982         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3983                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3984                                                   KmpTaskTShareds)),
3985                              Loc),
3986         CGM.getNaturalTypeAlignment(SharedsTy));
3987   }
3988   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3989                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3990   CGF.FinishFunction();
3991   return TaskDup;
3992 }
3993 
3994 /// Checks if destructor function is required to be generated.
3995 /// \return true if cleanups are required, false otherwise.
3996 static bool
3997 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998                          ArrayRef<PrivateDataTy> Privates) {
3999   for (const PrivateDataTy &P : Privates) {
4000     if (P.second.isLocalPrivate())
4001       continue;
4002     QualType Ty = P.second.Original->getType().getNonReferenceType();
4003     if (Ty.isDestructedType())
4004       return true;
4005   }
4006   return false;
4007 }
4008 
4009 namespace {
4010 /// Loop generator for OpenMP iterator expression.
4011 class OMPIteratorGeneratorScope final
4012     : public CodeGenFunction::OMPPrivateScope {
4013   CodeGenFunction &CGF;
4014   const OMPIteratorExpr *E = nullptr;
4015   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4016   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4017   OMPIteratorGeneratorScope() = delete;
4018   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4019 
4020 public:
4021   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4022       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4023     if (!E)
4024       return;
4025     SmallVector<llvm::Value *, 4> Uppers;
4026     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4027       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4028       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4029       addPrivate(VD, [&CGF, VD]() {
4030         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4031       });
4032       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4033       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4034         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4035                                  "counter.addr");
4036       });
4037     }
4038     Privatize();
4039 
4040     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4041       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4042       LValue CLVal =
4043           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4044                              HelperData.CounterVD->getType());
4045       // Counter = 0;
4046       CGF.EmitStoreOfScalar(
4047           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4048           CLVal);
4049       CodeGenFunction::JumpDest &ContDest =
4050           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4051       CodeGenFunction::JumpDest &ExitDest =
4052           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4053       // N = <number-of_iterations>;
4054       llvm::Value *N = Uppers[I];
4055       // cont:
4056       // if (Counter < N) goto body; else goto exit;
4057       CGF.EmitBlock(ContDest.getBlock());
4058       auto *CVal =
4059           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4060       llvm::Value *Cmp =
4061           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4062               ? CGF.Builder.CreateICmpSLT(CVal, N)
4063               : CGF.Builder.CreateICmpULT(CVal, N);
4064       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4065       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4066       // body:
4067       CGF.EmitBlock(BodyBB);
4068       // Iteri = Begini + Counter * Stepi;
4069       CGF.EmitIgnoredExpr(HelperData.Update);
4070     }
4071   }
4072   ~OMPIteratorGeneratorScope() {
4073     if (!E)
4074       return;
4075     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4076       // Counter = Counter + 1;
4077       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4078       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4079       // goto cont;
4080       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4081       // exit:
4082       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4083     }
4084   }
4085 };
4086 } // namespace
4087 
4088 static std::pair<llvm::Value *, llvm::Value *>
4089 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4090   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4091   llvm::Value *Addr;
4092   if (OASE) {
4093     const Expr *Base = OASE->getBase();
4094     Addr = CGF.EmitScalarExpr(Base);
4095   } else {
4096     Addr = CGF.EmitLValue(E).getPointer(CGF);
4097   }
4098   llvm::Value *SizeVal;
4099   QualType Ty = E->getType();
4100   if (OASE) {
4101     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4102     for (const Expr *SE : OASE->getDimensions()) {
4103       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4104       Sz = CGF.EmitScalarConversion(
4105           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4106       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4107     }
4108   } else if (const auto *ASE =
4109                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4110     LValue UpAddrLVal =
4111         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4112     llvm::Value *UpAddr =
4113         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4114     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4115     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4116     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4117   } else {
4118     SizeVal = CGF.getTypeSize(Ty);
4119   }
4120   return std::make_pair(Addr, SizeVal);
4121 }
4122 
4123 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4124 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4125   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4126   if (KmpTaskAffinityInfoTy.isNull()) {
4127     RecordDecl *KmpAffinityInfoRD =
4128         C.buildImplicitRecord("kmp_task_affinity_info_t");
4129     KmpAffinityInfoRD->startDefinition();
4130     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4131     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4132     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4133     KmpAffinityInfoRD->completeDefinition();
4134     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4135   }
4136 }
4137 
4138 CGOpenMPRuntime::TaskResultTy
4139 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4140                               const OMPExecutableDirective &D,
4141                               llvm::Function *TaskFunction, QualType SharedsTy,
4142                               Address Shareds, const OMPTaskDataTy &Data) {
4143   ASTContext &C = CGM.getContext();
4144   llvm::SmallVector<PrivateDataTy, 4> Privates;
4145   // Aggregate privates and sort them by the alignment.
4146   const auto *I = Data.PrivateCopies.begin();
4147   for (const Expr *E : Data.PrivateVars) {
4148     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4149     Privates.emplace_back(
4150         C.getDeclAlign(VD),
4151         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4152                          /*PrivateElemInit=*/nullptr));
4153     ++I;
4154   }
4155   I = Data.FirstprivateCopies.begin();
4156   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4157   for (const Expr *E : Data.FirstprivateVars) {
4158     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4159     Privates.emplace_back(
4160         C.getDeclAlign(VD),
4161         PrivateHelpersTy(
4162             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4163             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4164     ++I;
4165     ++IElemInitRef;
4166   }
4167   I = Data.LastprivateCopies.begin();
4168   for (const Expr *E : Data.LastprivateVars) {
4169     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4170     Privates.emplace_back(
4171         C.getDeclAlign(VD),
4172         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4173                          /*PrivateElemInit=*/nullptr));
4174     ++I;
4175   }
4176   for (const VarDecl *VD : Data.PrivateLocals) {
4177     if (isAllocatableDecl(VD))
4178       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4179     else
4180       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4181   }
4182   llvm::stable_sort(Privates,
4183                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4184                       return L.first > R.first;
4185                     });
4186   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4187   // Build type kmp_routine_entry_t (if not built yet).
4188   emitKmpRoutineEntryT(KmpInt32Ty);
4189   // Build type kmp_task_t (if not built yet).
4190   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4191     if (SavedKmpTaskloopTQTy.isNull()) {
4192       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4193           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4194     }
4195     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4196   } else {
4197     assert((D.getDirectiveKind() == OMPD_task ||
4198             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4199             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4200            "Expected taskloop, task or target directive");
4201     if (SavedKmpTaskTQTy.isNull()) {
4202       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4203           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4204     }
4205     KmpTaskTQTy = SavedKmpTaskTQTy;
4206   }
4207   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4208   // Build particular struct kmp_task_t for the given task.
4209   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4210       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4211   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4212   QualType KmpTaskTWithPrivatesPtrQTy =
4213       C.getPointerType(KmpTaskTWithPrivatesQTy);
4214   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4215   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4216       KmpTaskTWithPrivatesTy->getPointerTo();
4217   llvm::Value *KmpTaskTWithPrivatesTySize =
4218       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4219   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4220 
4221   // Emit initial values for private copies (if any).
4222   llvm::Value *TaskPrivatesMap = nullptr;
4223   llvm::Type *TaskPrivatesMapTy =
4224       std::next(TaskFunction->arg_begin(), 3)->getType();
4225   if (!Privates.empty()) {
4226     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4227     TaskPrivatesMap =
4228         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4229     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4230         TaskPrivatesMap, TaskPrivatesMapTy);
4231   } else {
4232     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4233         cast<llvm::PointerType>(TaskPrivatesMapTy));
4234   }
4235   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4236   // kmp_task_t *tt);
4237   llvm::Function *TaskEntry = emitProxyTaskFunction(
4238       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4239       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4240       TaskPrivatesMap);
4241 
4242   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4243   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4244   // kmp_routine_entry_t *task_entry);
4245   // Task flags. Format is taken from
4246   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4247   // description of kmp_tasking_flags struct.
4248   enum {
4249     TiedFlag = 0x1,
4250     FinalFlag = 0x2,
4251     DestructorsFlag = 0x8,
4252     PriorityFlag = 0x20,
4253     DetachableFlag = 0x40,
4254   };
4255   unsigned Flags = Data.Tied ? TiedFlag : 0;
4256   bool NeedsCleanup = false;
4257   if (!Privates.empty()) {
4258     NeedsCleanup =
4259         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4260     if (NeedsCleanup)
4261       Flags = Flags | DestructorsFlag;
4262   }
4263   if (Data.Priority.getInt())
4264     Flags = Flags | PriorityFlag;
4265   if (D.hasClausesOfKind<OMPDetachClause>())
4266     Flags = Flags | DetachableFlag;
4267   llvm::Value *TaskFlags =
4268       Data.Final.getPointer()
4269           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4270                                      CGF.Builder.getInt32(FinalFlag),
4271                                      CGF.Builder.getInt32(/*C=*/0))
4272           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4273   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4274   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4275   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4276       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4277       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4278           TaskEntry, KmpRoutineEntryPtrTy)};
4279   llvm::Value *NewTask;
4280   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4281     // Check if we have any device clause associated with the directive.
4282     const Expr *Device = nullptr;
4283     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4284       Device = C->getDevice();
4285     // Emit device ID if any otherwise use default value.
4286     llvm::Value *DeviceID;
4287     if (Device)
4288       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4289                                            CGF.Int64Ty, /*isSigned=*/true);
4290     else
4291       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4292     AllocArgs.push_back(DeviceID);
4293     NewTask = CGF.EmitRuntimeCall(
4294         OMPBuilder.getOrCreateRuntimeFunction(
4295             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4296         AllocArgs);
4297   } else {
4298     NewTask =
4299         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4300                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4301                             AllocArgs);
4302   }
4303   // Emit detach clause initialization.
4304   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4305   // task_descriptor);
4306   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4307     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4308     LValue EvtLVal = CGF.EmitLValue(Evt);
4309 
4310     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4311     // int gtid, kmp_task_t *task);
4312     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4313     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4314     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4315     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4316         OMPBuilder.getOrCreateRuntimeFunction(
4317             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4318         {Loc, Tid, NewTask});
4319     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4320                                       Evt->getExprLoc());
4321     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4322   }
4323   // Process affinity clauses.
4324   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4325     // Process list of affinity data.
4326     ASTContext &C = CGM.getContext();
4327     Address AffinitiesArray = Address::invalid();
4328     // Calculate number of elements to form the array of affinity data.
4329     llvm::Value *NumOfElements = nullptr;
4330     unsigned NumAffinities = 0;
4331     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4332       if (const Expr *Modifier = C->getModifier()) {
4333         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4334         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4335           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4336           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4337           NumOfElements =
4338               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4339         }
4340       } else {
4341         NumAffinities += C->varlist_size();
4342       }
4343     }
4344     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4345     // Fields ids in kmp_task_affinity_info record.
4346     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4347 
4348     QualType KmpTaskAffinityInfoArrayTy;
4349     if (NumOfElements) {
4350       NumOfElements = CGF.Builder.CreateNUWAdd(
4351           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4352       OpaqueValueExpr OVE(
4353           Loc,
4354           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4355           VK_RValue);
4356       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4357                                                     RValue::get(NumOfElements));
4358       KmpTaskAffinityInfoArrayTy =
4359           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4360                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4361       // Properly emit variable-sized array.
4362       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4363                                            ImplicitParamDecl::Other);
4364       CGF.EmitVarDecl(*PD);
4365       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4366       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4367                                                 /*isSigned=*/false);
4368     } else {
4369       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4370           KmpTaskAffinityInfoTy,
4371           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4372           ArrayType::Normal, /*IndexTypeQuals=*/0);
4373       AffinitiesArray =
4374           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4375       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4376       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4377                                              /*isSigned=*/false);
4378     }
4379 
4380     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4381     // Fill array by elements without iterators.
4382     unsigned Pos = 0;
4383     bool HasIterator = false;
4384     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4385       if (C->getModifier()) {
4386         HasIterator = true;
4387         continue;
4388       }
4389       for (const Expr *E : C->varlists()) {
4390         llvm::Value *Addr;
4391         llvm::Value *Size;
4392         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4393         LValue Base =
4394             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4395                                KmpTaskAffinityInfoTy);
4396         // affs[i].base_addr = &<Affinities[i].second>;
4397         LValue BaseAddrLVal = CGF.EmitLValueForField(
4398             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4399         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4400                               BaseAddrLVal);
4401         // affs[i].len = sizeof(<Affinities[i].second>);
4402         LValue LenLVal = CGF.EmitLValueForField(
4403             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4404         CGF.EmitStoreOfScalar(Size, LenLVal);
4405         ++Pos;
4406       }
4407     }
4408     LValue PosLVal;
4409     if (HasIterator) {
4410       PosLVal = CGF.MakeAddrLValue(
4411           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4412           C.getSizeType());
4413       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4414     }
4415     // Process elements with iterators.
4416     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4417       const Expr *Modifier = C->getModifier();
4418       if (!Modifier)
4419         continue;
4420       OMPIteratorGeneratorScope IteratorScope(
4421           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4422       for (const Expr *E : C->varlists()) {
4423         llvm::Value *Addr;
4424         llvm::Value *Size;
4425         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4426         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4427         LValue Base = CGF.MakeAddrLValue(
4428             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4429                     AffinitiesArray.getAlignment()),
4430             KmpTaskAffinityInfoTy);
4431         // affs[i].base_addr = &<Affinities[i].second>;
4432         LValue BaseAddrLVal = CGF.EmitLValueForField(
4433             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4434         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4435                               BaseAddrLVal);
4436         // affs[i].len = sizeof(<Affinities[i].second>);
4437         LValue LenLVal = CGF.EmitLValueForField(
4438             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4439         CGF.EmitStoreOfScalar(Size, LenLVal);
4440         Idx = CGF.Builder.CreateNUWAdd(
4441             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4442         CGF.EmitStoreOfScalar(Idx, PosLVal);
4443       }
4444     }
4445     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4446     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4447     // naffins, kmp_task_affinity_info_t *affin_list);
4448     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4449     llvm::Value *GTid = getThreadID(CGF, Loc);
4450     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4451         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4452     // FIXME: Emit the function and ignore its result for now unless the
4453     // runtime function is properly implemented.
4454     (void)CGF.EmitRuntimeCall(
4455         OMPBuilder.getOrCreateRuntimeFunction(
4456             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4457         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4458   }
4459   llvm::Value *NewTaskNewTaskTTy =
4460       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4461           NewTask, KmpTaskTWithPrivatesPtrTy);
4462   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4463                                                KmpTaskTWithPrivatesQTy);
4464   LValue TDBase =
4465       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4466   // Fill the data in the resulting kmp_task_t record.
4467   // Copy shareds if there are any.
4468   Address KmpTaskSharedsPtr = Address::invalid();
4469   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4470     KmpTaskSharedsPtr =
4471         Address(CGF.EmitLoadOfScalar(
4472                     CGF.EmitLValueForField(
4473                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4474                                            KmpTaskTShareds)),
4475                     Loc),
4476                 CGM.getNaturalTypeAlignment(SharedsTy));
4477     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4478     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4479     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4480   }
4481   // Emit initial values for private copies (if any).
4482   TaskResultTy Result;
4483   if (!Privates.empty()) {
4484     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4485                      SharedsTy, SharedsPtrTy, Data, Privates,
4486                      /*ForDup=*/false);
4487     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4488         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4489       Result.TaskDupFn = emitTaskDupFunction(
4490           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4491           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4492           /*WithLastIter=*/!Data.LastprivateVars.empty());
4493     }
4494   }
4495   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4496   enum { Priority = 0, Destructors = 1 };
4497   // Provide pointer to function with destructors for privates.
4498   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4499   const RecordDecl *KmpCmplrdataUD =
4500       (*FI)->getType()->getAsUnionType()->getDecl();
4501   if (NeedsCleanup) {
4502     llvm::Value *DestructorFn = emitDestructorsFunction(
4503         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4504         KmpTaskTWithPrivatesQTy);
4505     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4506     LValue DestructorsLV = CGF.EmitLValueForField(
4507         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4508     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4509                               DestructorFn, KmpRoutineEntryPtrTy),
4510                           DestructorsLV);
4511   }
4512   // Set priority.
4513   if (Data.Priority.getInt()) {
4514     LValue Data2LV = CGF.EmitLValueForField(
4515         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4516     LValue PriorityLV = CGF.EmitLValueForField(
4517         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4518     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4519   }
4520   Result.NewTask = NewTask;
4521   Result.TaskEntry = TaskEntry;
4522   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4523   Result.TDBase = TDBase;
4524   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4525   return Result;
4526 }
4527 
4528 namespace {
4529 /// Dependence kind for RTL.
4530 enum RTLDependenceKindTy {
4531   DepIn = 0x01,
4532   DepInOut = 0x3,
4533   DepMutexInOutSet = 0x4
4534 };
4535 /// Fields ids in kmp_depend_info record.
4536 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4537 } // namespace
4538 
4539 /// Translates internal dependency kind into the runtime kind.
4540 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4541   RTLDependenceKindTy DepKind;
4542   switch (K) {
4543   case OMPC_DEPEND_in:
4544     DepKind = DepIn;
4545     break;
4546   // Out and InOut dependencies must use the same code.
4547   case OMPC_DEPEND_out:
4548   case OMPC_DEPEND_inout:
4549     DepKind = DepInOut;
4550     break;
4551   case OMPC_DEPEND_mutexinoutset:
4552     DepKind = DepMutexInOutSet;
4553     break;
4554   case OMPC_DEPEND_source:
4555   case OMPC_DEPEND_sink:
4556   case OMPC_DEPEND_depobj:
4557   case OMPC_DEPEND_unknown:
4558     llvm_unreachable("Unknown task dependence type");
4559   }
4560   return DepKind;
4561 }
4562 
4563 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4564 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4565                            QualType &FlagsTy) {
4566   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4567   if (KmpDependInfoTy.isNull()) {
4568     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4569     KmpDependInfoRD->startDefinition();
4570     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4571     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4572     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4573     KmpDependInfoRD->completeDefinition();
4574     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4575   }
4576 }
4577 
4578 std::pair<llvm::Value *, LValue>
4579 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4580                                    SourceLocation Loc) {
4581   ASTContext &C = CGM.getContext();
4582   QualType FlagsTy;
4583   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584   RecordDecl *KmpDependInfoRD =
4585       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4586   LValue Base = CGF.EmitLoadOfPointerLValue(
4587       DepobjLVal.getAddress(CGF),
4588       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4589   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4590   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4591           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4592   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4593                             Base.getTBAAInfo());
4594   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4595       Addr.getPointer(),
4596       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4597   LValue NumDepsBase = CGF.MakeAddrLValue(
4598       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4599       Base.getBaseInfo(), Base.getTBAAInfo());
4600   // NumDeps = deps[i].base_addr;
4601   LValue BaseAddrLVal = CGF.EmitLValueForField(
4602       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4603   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4604   return std::make_pair(NumDeps, Base);
4605 }
4606 
4607 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4608                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4609                            const OMPTaskDataTy::DependData &Data,
4610                            Address DependenciesArray) {
4611   CodeGenModule &CGM = CGF.CGM;
4612   ASTContext &C = CGM.getContext();
4613   QualType FlagsTy;
4614   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4615   RecordDecl *KmpDependInfoRD =
4616       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4617   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4618 
4619   OMPIteratorGeneratorScope IteratorScope(
4620       CGF, cast_or_null<OMPIteratorExpr>(
4621                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4622                                  : nullptr));
4623   for (const Expr *E : Data.DepExprs) {
4624     llvm::Value *Addr;
4625     llvm::Value *Size;
4626     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4627     LValue Base;
4628     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4629       Base = CGF.MakeAddrLValue(
4630           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4631     } else {
4632       LValue &PosLVal = *Pos.get<LValue *>();
4633       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4634       Base = CGF.MakeAddrLValue(
4635           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4636                   DependenciesArray.getAlignment()),
4637           KmpDependInfoTy);
4638     }
4639     // deps[i].base_addr = &<Dependencies[i].second>;
4640     LValue BaseAddrLVal = CGF.EmitLValueForField(
4641         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4642     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4643                           BaseAddrLVal);
4644     // deps[i].len = sizeof(<Dependencies[i].second>);
4645     LValue LenLVal = CGF.EmitLValueForField(
4646         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4647     CGF.EmitStoreOfScalar(Size, LenLVal);
4648     // deps[i].flags = <Dependencies[i].first>;
4649     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4650     LValue FlagsLVal = CGF.EmitLValueForField(
4651         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4652     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4653                           FlagsLVal);
4654     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4655       ++(*P);
4656     } else {
4657       LValue &PosLVal = *Pos.get<LValue *>();
4658       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4659       Idx = CGF.Builder.CreateNUWAdd(Idx,
4660                                      llvm::ConstantInt::get(Idx->getType(), 1));
4661       CGF.EmitStoreOfScalar(Idx, PosLVal);
4662     }
4663   }
4664 }
4665 
4666 static SmallVector<llvm::Value *, 4>
4667 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4668                         const OMPTaskDataTy::DependData &Data) {
4669   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4670          "Expected depobj dependecy kind.");
4671   SmallVector<llvm::Value *, 4> Sizes;
4672   SmallVector<LValue, 4> SizeLVals;
4673   ASTContext &C = CGF.getContext();
4674   QualType FlagsTy;
4675   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4676   RecordDecl *KmpDependInfoRD =
4677       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4678   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4679   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4680   {
4681     OMPIteratorGeneratorScope IteratorScope(
4682         CGF, cast_or_null<OMPIteratorExpr>(
4683                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4684                                    : nullptr));
4685     for (const Expr *E : Data.DepExprs) {
4686       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4687       LValue Base = CGF.EmitLoadOfPointerLValue(
4688           DepobjLVal.getAddress(CGF),
4689           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4690       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4691           Base.getAddress(CGF), KmpDependInfoPtrT);
4692       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4693                                 Base.getTBAAInfo());
4694       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4695           Addr.getPointer(),
4696           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4697       LValue NumDepsBase = CGF.MakeAddrLValue(
4698           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4699           Base.getBaseInfo(), Base.getTBAAInfo());
4700       // NumDeps = deps[i].base_addr;
4701       LValue BaseAddrLVal = CGF.EmitLValueForField(
4702           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4703       llvm::Value *NumDeps =
4704           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4705       LValue NumLVal = CGF.MakeAddrLValue(
4706           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4707           C.getUIntPtrType());
4708       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4709                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4710       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4711       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4712       CGF.EmitStoreOfScalar(Add, NumLVal);
4713       SizeLVals.push_back(NumLVal);
4714     }
4715   }
4716   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4717     llvm::Value *Size =
4718         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4719     Sizes.push_back(Size);
4720   }
4721   return Sizes;
4722 }
4723 
4724 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4725                                LValue PosLVal,
4726                                const OMPTaskDataTy::DependData &Data,
4727                                Address DependenciesArray) {
4728   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4729          "Expected depobj dependecy kind.");
4730   ASTContext &C = CGF.getContext();
4731   QualType FlagsTy;
4732   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4733   RecordDecl *KmpDependInfoRD =
4734       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4735   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4736   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4737   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4738   {
4739     OMPIteratorGeneratorScope IteratorScope(
4740         CGF, cast_or_null<OMPIteratorExpr>(
4741                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4742                                    : nullptr));
4743     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4744       const Expr *E = Data.DepExprs[I];
4745       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4746       LValue Base = CGF.EmitLoadOfPointerLValue(
4747           DepobjLVal.getAddress(CGF),
4748           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4749       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4750           Base.getAddress(CGF), KmpDependInfoPtrT);
4751       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4752                                 Base.getTBAAInfo());
4753 
4754       // Get number of elements in a single depobj.
4755       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4756           Addr.getPointer(),
4757           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4758       LValue NumDepsBase = CGF.MakeAddrLValue(
4759           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4760           Base.getBaseInfo(), Base.getTBAAInfo());
4761       // NumDeps = deps[i].base_addr;
4762       LValue BaseAddrLVal = CGF.EmitLValueForField(
4763           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4764       llvm::Value *NumDeps =
4765           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4766 
4767       // memcopy dependency data.
4768       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4769           ElSize,
4770           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4771       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4772       Address DepAddr =
4773           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4774                   DependenciesArray.getAlignment());
4775       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4776 
4777       // Increase pos.
4778       // pos += size;
4779       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4780       CGF.EmitStoreOfScalar(Add, PosLVal);
4781     }
4782   }
4783 }
4784 
4785 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4786     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4787     SourceLocation Loc) {
4788   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4789         return D.DepExprs.empty();
4790       }))
4791     return std::make_pair(nullptr, Address::invalid());
4792   // Process list of dependencies.
4793   ASTContext &C = CGM.getContext();
4794   Address DependenciesArray = Address::invalid();
4795   llvm::Value *NumOfElements = nullptr;
4796   unsigned NumDependencies = std::accumulate(
4797       Dependencies.begin(), Dependencies.end(), 0,
4798       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4799         return D.DepKind == OMPC_DEPEND_depobj
4800                    ? V
4801                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4802       });
4803   QualType FlagsTy;
4804   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4805   bool HasDepobjDeps = false;
4806   bool HasRegularWithIterators = false;
4807   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4808   llvm::Value *NumOfRegularWithIterators =
4809       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4810   // Calculate number of depobj dependecies and regular deps with the iterators.
4811   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4812     if (D.DepKind == OMPC_DEPEND_depobj) {
4813       SmallVector<llvm::Value *, 4> Sizes =
4814           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4815       for (llvm::Value *Size : Sizes) {
4816         NumOfDepobjElements =
4817             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4818       }
4819       HasDepobjDeps = true;
4820       continue;
4821     }
4822     // Include number of iterations, if any.
4823     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4824       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4825         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4826         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4827         NumOfRegularWithIterators =
4828             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4829       }
4830       HasRegularWithIterators = true;
4831       continue;
4832     }
4833   }
4834 
4835   QualType KmpDependInfoArrayTy;
4836   if (HasDepobjDeps || HasRegularWithIterators) {
4837     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4838                                            /*isSigned=*/false);
4839     if (HasDepobjDeps) {
4840       NumOfElements =
4841           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4842     }
4843     if (HasRegularWithIterators) {
4844       NumOfElements =
4845           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4846     }
4847     OpaqueValueExpr OVE(Loc,
4848                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4849                         VK_RValue);
4850     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4851                                                   RValue::get(NumOfElements));
4852     KmpDependInfoArrayTy =
4853         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4854                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4855     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4856     // Properly emit variable-sized array.
4857     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4858                                          ImplicitParamDecl::Other);
4859     CGF.EmitVarDecl(*PD);
4860     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4861     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4862                                               /*isSigned=*/false);
4863   } else {
4864     KmpDependInfoArrayTy = C.getConstantArrayType(
4865         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4866         ArrayType::Normal, /*IndexTypeQuals=*/0);
4867     DependenciesArray =
4868         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4869     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4870     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4871                                            /*isSigned=*/false);
4872   }
4873   unsigned Pos = 0;
4874   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4875     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4876         Dependencies[I].IteratorExpr)
4877       continue;
4878     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4879                    DependenciesArray);
4880   }
4881   // Copy regular dependecies with iterators.
4882   LValue PosLVal = CGF.MakeAddrLValue(
4883       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4884   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4885   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4886     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4887         !Dependencies[I].IteratorExpr)
4888       continue;
4889     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4890                    DependenciesArray);
4891   }
4892   // Copy final depobj arrays without iterators.
4893   if (HasDepobjDeps) {
4894     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4895       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4896         continue;
4897       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4898                          DependenciesArray);
4899     }
4900   }
4901   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4902       DependenciesArray, CGF.VoidPtrTy);
4903   return std::make_pair(NumOfElements, DependenciesArray);
4904 }
4905 
4906 Address CGOpenMPRuntime::emitDepobjDependClause(
4907     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4908     SourceLocation Loc) {
4909   if (Dependencies.DepExprs.empty())
4910     return Address::invalid();
4911   // Process list of dependencies.
4912   ASTContext &C = CGM.getContext();
4913   Address DependenciesArray = Address::invalid();
4914   unsigned NumDependencies = Dependencies.DepExprs.size();
4915   QualType FlagsTy;
4916   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4917   RecordDecl *KmpDependInfoRD =
4918       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4919 
4920   llvm::Value *Size;
4921   // Define type kmp_depend_info[<Dependencies.size()>];
4922   // For depobj reserve one extra element to store the number of elements.
4923   // It is required to handle depobj(x) update(in) construct.
4924   // kmp_depend_info[<Dependencies.size()>] deps;
4925   llvm::Value *NumDepsVal;
4926   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4927   if (const auto *IE =
4928           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4929     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4930     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4931       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4932       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4933       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4934     }
4935     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4936                                     NumDepsVal);
4937     CharUnits SizeInBytes =
4938         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4939     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4940     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4941     NumDepsVal =
4942         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4943   } else {
4944     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4945         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4946         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4947     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4948     Size = CGM.getSize(Sz.alignTo(Align));
4949     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4950   }
4951   // Need to allocate on the dynamic memory.
4952   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4953   // Use default allocator.
4954   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4955   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4956 
4957   llvm::Value *Addr =
4958       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4959                               CGM.getModule(), OMPRTL___kmpc_alloc),
4960                           Args, ".dep.arr.addr");
4961   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4962       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4963   DependenciesArray = Address(Addr, Align);
4964   // Write number of elements in the first element of array for depobj.
4965   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4966   // deps[i].base_addr = NumDependencies;
4967   LValue BaseAddrLVal = CGF.EmitLValueForField(
4968       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4969   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4970   llvm::PointerUnion<unsigned *, LValue *> Pos;
4971   unsigned Idx = 1;
4972   LValue PosLVal;
4973   if (Dependencies.IteratorExpr) {
4974     PosLVal = CGF.MakeAddrLValue(
4975         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4976         C.getSizeType());
4977     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4978                           /*IsInit=*/true);
4979     Pos = &PosLVal;
4980   } else {
4981     Pos = &Idx;
4982   }
4983   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4984   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4985       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4986   return DependenciesArray;
4987 }
4988 
4989 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4990                                         SourceLocation Loc) {
4991   ASTContext &C = CGM.getContext();
4992   QualType FlagsTy;
4993   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4994   LValue Base = CGF.EmitLoadOfPointerLValue(
4995       DepobjLVal.getAddress(CGF),
4996       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4997   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4998   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4999       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5000   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5001       Addr.getPointer(),
5002       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5003   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5004                                                                CGF.VoidPtrTy);
5005   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5006   // Use default allocator.
5007   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5008   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5009 
5010   // _kmpc_free(gtid, addr, nullptr);
5011   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5012                                 CGM.getModule(), OMPRTL___kmpc_free),
5013                             Args);
5014 }
5015 
5016 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5017                                        OpenMPDependClauseKind NewDepKind,
5018                                        SourceLocation Loc) {
5019   ASTContext &C = CGM.getContext();
5020   QualType FlagsTy;
5021   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5022   RecordDecl *KmpDependInfoRD =
5023       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5024   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5025   llvm::Value *NumDeps;
5026   LValue Base;
5027   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5028 
5029   Address Begin = Base.getAddress(CGF);
5030   // Cast from pointer to array type to pointer to single element.
5031   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5032   // The basic structure here is a while-do loop.
5033   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5034   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5035   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5036   CGF.EmitBlock(BodyBB);
5037   llvm::PHINode *ElementPHI =
5038       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5039   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5040   Begin = Address(ElementPHI, Begin.getAlignment());
5041   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5042                             Base.getTBAAInfo());
5043   // deps[i].flags = NewDepKind;
5044   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5045   LValue FlagsLVal = CGF.EmitLValueForField(
5046       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5047   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5048                         FlagsLVal);
5049 
5050   // Shift the address forward by one element.
5051   Address ElementNext =
5052       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5053   ElementPHI->addIncoming(ElementNext.getPointer(),
5054                           CGF.Builder.GetInsertBlock());
5055   llvm::Value *IsEmpty =
5056       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5057   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5058   // Done.
5059   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5060 }
5061 
5062 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5063                                    const OMPExecutableDirective &D,
5064                                    llvm::Function *TaskFunction,
5065                                    QualType SharedsTy, Address Shareds,
5066                                    const Expr *IfCond,
5067                                    const OMPTaskDataTy &Data) {
5068   if (!CGF.HaveInsertPoint())
5069     return;
5070 
5071   TaskResultTy Result =
5072       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5073   llvm::Value *NewTask = Result.NewTask;
5074   llvm::Function *TaskEntry = Result.TaskEntry;
5075   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5076   LValue TDBase = Result.TDBase;
5077   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5078   // Process list of dependences.
5079   Address DependenciesArray = Address::invalid();
5080   llvm::Value *NumOfElements;
5081   std::tie(NumOfElements, DependenciesArray) =
5082       emitDependClause(CGF, Data.Dependences, Loc);
5083 
5084   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5085   // libcall.
5086   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5087   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5088   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5089   // list is not empty
5090   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5091   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5092   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5093   llvm::Value *DepTaskArgs[7];
5094   if (!Data.Dependences.empty()) {
5095     DepTaskArgs[0] = UpLoc;
5096     DepTaskArgs[1] = ThreadID;
5097     DepTaskArgs[2] = NewTask;
5098     DepTaskArgs[3] = NumOfElements;
5099     DepTaskArgs[4] = DependenciesArray.getPointer();
5100     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5101     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5102   }
5103   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5104                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5105     if (!Data.Tied) {
5106       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5107       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5108       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5109     }
5110     if (!Data.Dependences.empty()) {
5111       CGF.EmitRuntimeCall(
5112           OMPBuilder.getOrCreateRuntimeFunction(
5113               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5114           DepTaskArgs);
5115     } else {
5116       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5117                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5118                           TaskArgs);
5119     }
5120     // Check if parent region is untied and build return for untied task;
5121     if (auto *Region =
5122             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5123       Region->emitUntiedSwitch(CGF);
5124   };
5125 
5126   llvm::Value *DepWaitTaskArgs[6];
5127   if (!Data.Dependences.empty()) {
5128     DepWaitTaskArgs[0] = UpLoc;
5129     DepWaitTaskArgs[1] = ThreadID;
5130     DepWaitTaskArgs[2] = NumOfElements;
5131     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5132     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5133     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5134   }
5135   auto &M = CGM.getModule();
5136   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5137                         TaskEntry, &Data, &DepWaitTaskArgs,
5138                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5139     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5140     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5141     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5142     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5143     // is specified.
5144     if (!Data.Dependences.empty())
5145       CGF.EmitRuntimeCall(
5146           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5147           DepWaitTaskArgs);
5148     // Call proxy_task_entry(gtid, new_task);
5149     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5150                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5151       Action.Enter(CGF);
5152       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5153       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5154                                                           OutlinedFnArgs);
5155     };
5156 
5157     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5158     // kmp_task_t *new_task);
5159     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5160     // kmp_task_t *new_task);
5161     RegionCodeGenTy RCG(CodeGen);
5162     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5163                               M, OMPRTL___kmpc_omp_task_begin_if0),
5164                           TaskArgs,
5165                           OMPBuilder.getOrCreateRuntimeFunction(
5166                               M, OMPRTL___kmpc_omp_task_complete_if0),
5167                           TaskArgs);
5168     RCG.setAction(Action);
5169     RCG(CGF);
5170   };
5171 
5172   if (IfCond) {
5173     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5174   } else {
5175     RegionCodeGenTy ThenRCG(ThenCodeGen);
5176     ThenRCG(CGF);
5177   }
5178 }
5179 
5180 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5181                                        const OMPLoopDirective &D,
5182                                        llvm::Function *TaskFunction,
5183                                        QualType SharedsTy, Address Shareds,
5184                                        const Expr *IfCond,
5185                                        const OMPTaskDataTy &Data) {
5186   if (!CGF.HaveInsertPoint())
5187     return;
5188   TaskResultTy Result =
5189       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5190   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5191   // libcall.
5192   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5193   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5194   // sched, kmp_uint64 grainsize, void *task_dup);
5195   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5196   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5197   llvm::Value *IfVal;
5198   if (IfCond) {
5199     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5200                                       /*isSigned=*/true);
5201   } else {
5202     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5203   }
5204 
5205   LValue LBLVal = CGF.EmitLValueForField(
5206       Result.TDBase,
5207       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5208   const auto *LBVar =
5209       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5210   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5211                        LBLVal.getQuals(),
5212                        /*IsInitializer=*/true);
5213   LValue UBLVal = CGF.EmitLValueForField(
5214       Result.TDBase,
5215       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5216   const auto *UBVar =
5217       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5218   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5219                        UBLVal.getQuals(),
5220                        /*IsInitializer=*/true);
5221   LValue StLVal = CGF.EmitLValueForField(
5222       Result.TDBase,
5223       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5224   const auto *StVar =
5225       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5226   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5227                        StLVal.getQuals(),
5228                        /*IsInitializer=*/true);
5229   // Store reductions address.
5230   LValue RedLVal = CGF.EmitLValueForField(
5231       Result.TDBase,
5232       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5233   if (Data.Reductions) {
5234     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5235   } else {
5236     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5237                                CGF.getContext().VoidPtrTy);
5238   }
5239   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5240   llvm::Value *TaskArgs[] = {
5241       UpLoc,
5242       ThreadID,
5243       Result.NewTask,
5244       IfVal,
5245       LBLVal.getPointer(CGF),
5246       UBLVal.getPointer(CGF),
5247       CGF.EmitLoadOfScalar(StLVal, Loc),
5248       llvm::ConstantInt::getSigned(
5249           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5250       llvm::ConstantInt::getSigned(
5251           CGF.IntTy, Data.Schedule.getPointer()
5252                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5253                          : NoSchedule),
5254       Data.Schedule.getPointer()
5255           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5256                                       /*isSigned=*/false)
5257           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5258       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5259                              Result.TaskDupFn, CGF.VoidPtrTy)
5260                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5261   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5262                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5263                       TaskArgs);
5264 }
5265 
5266 /// Emit reduction operation for each element of array (required for
5267 /// array sections) LHS op = RHS.
5268 /// \param Type Type of array.
5269 /// \param LHSVar Variable on the left side of the reduction operation
5270 /// (references element of array in original variable).
5271 /// \param RHSVar Variable on the right side of the reduction operation
5272 /// (references element of array in original variable).
5273 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5274 /// RHSVar.
5275 static void EmitOMPAggregateReduction(
5276     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5277     const VarDecl *RHSVar,
5278     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5279                                   const Expr *, const Expr *)> &RedOpGen,
5280     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5281     const Expr *UpExpr = nullptr) {
5282   // Perform element-by-element initialization.
5283   QualType ElementTy;
5284   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5285   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5286 
5287   // Drill down to the base element type on both arrays.
5288   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5289   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5290 
5291   llvm::Value *RHSBegin = RHSAddr.getPointer();
5292   llvm::Value *LHSBegin = LHSAddr.getPointer();
5293   // Cast from pointer to array type to pointer to single element.
5294   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5295   // The basic structure here is a while-do loop.
5296   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5297   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5298   llvm::Value *IsEmpty =
5299       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5300   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5301 
5302   // Enter the loop body, making that address the current address.
5303   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5304   CGF.EmitBlock(BodyBB);
5305 
5306   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5307 
5308   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5309       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5310   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5311   Address RHSElementCurrent =
5312       Address(RHSElementPHI,
5313               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5314 
5315   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5316       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5317   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5318   Address LHSElementCurrent =
5319       Address(LHSElementPHI,
5320               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5321 
5322   // Emit copy.
5323   CodeGenFunction::OMPPrivateScope Scope(CGF);
5324   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5325   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5326   Scope.Privatize();
5327   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5328   Scope.ForceCleanup();
5329 
5330   // Shift the address forward by one element.
5331   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5332       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5333   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5334       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5335   // Check whether we've reached the end.
5336   llvm::Value *Done =
5337       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5338   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5339   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5340   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5341 
5342   // Done.
5343   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5344 }
5345 
5346 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5347 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5348 /// UDR combiner function.
5349 static void emitReductionCombiner(CodeGenFunction &CGF,
5350                                   const Expr *ReductionOp) {
5351   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5352     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5353       if (const auto *DRE =
5354               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5355         if (const auto *DRD =
5356                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5357           std::pair<llvm::Function *, llvm::Function *> Reduction =
5358               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5359           RValue Func = RValue::get(Reduction.first);
5360           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5361           CGF.EmitIgnoredExpr(ReductionOp);
5362           return;
5363         }
5364   CGF.EmitIgnoredExpr(ReductionOp);
5365 }
5366 
5367 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5368     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5369     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5370     ArrayRef<const Expr *> ReductionOps) {
5371   ASTContext &C = CGM.getContext();
5372 
5373   // void reduction_func(void *LHSArg, void *RHSArg);
5374   FunctionArgList Args;
5375   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5376                            ImplicitParamDecl::Other);
5377   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5378                            ImplicitParamDecl::Other);
5379   Args.push_back(&LHSArg);
5380   Args.push_back(&RHSArg);
5381   const auto &CGFI =
5382       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5383   std::string Name = getName({"omp", "reduction", "reduction_func"});
5384   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5385                                     llvm::GlobalValue::InternalLinkage, Name,
5386                                     &CGM.getModule());
5387   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5388   Fn->setDoesNotRecurse();
5389   CodeGenFunction CGF(CGM);
5390   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5391 
5392   // Dst = (void*[n])(LHSArg);
5393   // Src = (void*[n])(RHSArg);
5394   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5395       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5396       ArgsType), CGF.getPointerAlign());
5397   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5398       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5399       ArgsType), CGF.getPointerAlign());
5400 
5401   //  ...
5402   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5403   //  ...
5404   CodeGenFunction::OMPPrivateScope Scope(CGF);
5405   auto IPriv = Privates.begin();
5406   unsigned Idx = 0;
5407   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5408     const auto *RHSVar =
5409         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5410     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5411       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5412     });
5413     const auto *LHSVar =
5414         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5415     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5416       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5417     });
5418     QualType PrivTy = (*IPriv)->getType();
5419     if (PrivTy->isVariablyModifiedType()) {
5420       // Get array size and emit VLA type.
5421       ++Idx;
5422       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5423       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5424       const VariableArrayType *VLA =
5425           CGF.getContext().getAsVariableArrayType(PrivTy);
5426       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5427       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5428           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5429       CGF.EmitVariablyModifiedType(PrivTy);
5430     }
5431   }
5432   Scope.Privatize();
5433   IPriv = Privates.begin();
5434   auto ILHS = LHSExprs.begin();
5435   auto IRHS = RHSExprs.begin();
5436   for (const Expr *E : ReductionOps) {
5437     if ((*IPriv)->getType()->isArrayType()) {
5438       // Emit reduction for array section.
5439       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5440       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5441       EmitOMPAggregateReduction(
5442           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5443           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5444             emitReductionCombiner(CGF, E);
5445           });
5446     } else {
5447       // Emit reduction for array subscript or single variable.
5448       emitReductionCombiner(CGF, E);
5449     }
5450     ++IPriv;
5451     ++ILHS;
5452     ++IRHS;
5453   }
5454   Scope.ForceCleanup();
5455   CGF.FinishFunction();
5456   return Fn;
5457 }
5458 
5459 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5460                                                   const Expr *ReductionOp,
5461                                                   const Expr *PrivateRef,
5462                                                   const DeclRefExpr *LHS,
5463                                                   const DeclRefExpr *RHS) {
5464   if (PrivateRef->getType()->isArrayType()) {
5465     // Emit reduction for array section.
5466     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5467     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5468     EmitOMPAggregateReduction(
5469         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5470         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5471           emitReductionCombiner(CGF, ReductionOp);
5472         });
5473   } else {
5474     // Emit reduction for array subscript or single variable.
5475     emitReductionCombiner(CGF, ReductionOp);
5476   }
5477 }
5478 
5479 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5480                                     ArrayRef<const Expr *> Privates,
5481                                     ArrayRef<const Expr *> LHSExprs,
5482                                     ArrayRef<const Expr *> RHSExprs,
5483                                     ArrayRef<const Expr *> ReductionOps,
5484                                     ReductionOptionsTy Options) {
5485   if (!CGF.HaveInsertPoint())
5486     return;
5487 
5488   bool WithNowait = Options.WithNowait;
5489   bool SimpleReduction = Options.SimpleReduction;
5490 
5491   // Next code should be emitted for reduction:
5492   //
5493   // static kmp_critical_name lock = { 0 };
5494   //
5495   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5496   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5497   //  ...
5498   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5499   //  *(Type<n>-1*)rhs[<n>-1]);
5500   // }
5501   //
5502   // ...
5503   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5504   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5505   // RedList, reduce_func, &<lock>)) {
5506   // case 1:
5507   //  ...
5508   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5509   //  ...
5510   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5511   // break;
5512   // case 2:
5513   //  ...
5514   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5515   //  ...
5516   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5517   // break;
5518   // default:;
5519   // }
5520   //
5521   // if SimpleReduction is true, only the next code is generated:
5522   //  ...
5523   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5524   //  ...
5525 
5526   ASTContext &C = CGM.getContext();
5527 
5528   if (SimpleReduction) {
5529     CodeGenFunction::RunCleanupsScope Scope(CGF);
5530     auto IPriv = Privates.begin();
5531     auto ILHS = LHSExprs.begin();
5532     auto IRHS = RHSExprs.begin();
5533     for (const Expr *E : ReductionOps) {
5534       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5535                                   cast<DeclRefExpr>(*IRHS));
5536       ++IPriv;
5537       ++ILHS;
5538       ++IRHS;
5539     }
5540     return;
5541   }
5542 
5543   // 1. Build a list of reduction variables.
5544   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5545   auto Size = RHSExprs.size();
5546   for (const Expr *E : Privates) {
5547     if (E->getType()->isVariablyModifiedType())
5548       // Reserve place for array size.
5549       ++Size;
5550   }
5551   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5552   QualType ReductionArrayTy =
5553       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5554                              /*IndexTypeQuals=*/0);
5555   Address ReductionList =
5556       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5557   auto IPriv = Privates.begin();
5558   unsigned Idx = 0;
5559   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5560     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5561     CGF.Builder.CreateStore(
5562         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5563             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5564         Elem);
5565     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5566       // Store array size.
5567       ++Idx;
5568       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5569       llvm::Value *Size = CGF.Builder.CreateIntCast(
5570           CGF.getVLASize(
5571                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5572               .NumElts,
5573           CGF.SizeTy, /*isSigned=*/false);
5574       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5575                               Elem);
5576     }
5577   }
5578 
5579   // 2. Emit reduce_func().
5580   llvm::Function *ReductionFn = emitReductionFunction(
5581       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5582       LHSExprs, RHSExprs, ReductionOps);
5583 
5584   // 3. Create static kmp_critical_name lock = { 0 };
5585   std::string Name = getName({"reduction"});
5586   llvm::Value *Lock = getCriticalRegionLock(Name);
5587 
5588   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5589   // RedList, reduce_func, &<lock>);
5590   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5591   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5592   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5593   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5594       ReductionList.getPointer(), CGF.VoidPtrTy);
5595   llvm::Value *Args[] = {
5596       IdentTLoc,                             // ident_t *<loc>
5597       ThreadId,                              // i32 <gtid>
5598       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5599       ReductionArrayTySize,                  // size_type sizeof(RedList)
5600       RL,                                    // void *RedList
5601       ReductionFn, // void (*) (void *, void *) <reduce_func>
5602       Lock         // kmp_critical_name *&<lock>
5603   };
5604   llvm::Value *Res = CGF.EmitRuntimeCall(
5605       OMPBuilder.getOrCreateRuntimeFunction(
5606           CGM.getModule(),
5607           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5608       Args);
5609 
5610   // 5. Build switch(res)
5611   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5612   llvm::SwitchInst *SwInst =
5613       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5614 
5615   // 6. Build case 1:
5616   //  ...
5617   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5618   //  ...
5619   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5620   // break;
5621   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5622   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5623   CGF.EmitBlock(Case1BB);
5624 
5625   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5626   llvm::Value *EndArgs[] = {
5627       IdentTLoc, // ident_t *<loc>
5628       ThreadId,  // i32 <gtid>
5629       Lock       // kmp_critical_name *&<lock>
5630   };
5631   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5632                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5633     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5634     auto IPriv = Privates.begin();
5635     auto ILHS = LHSExprs.begin();
5636     auto IRHS = RHSExprs.begin();
5637     for (const Expr *E : ReductionOps) {
5638       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5639                                      cast<DeclRefExpr>(*IRHS));
5640       ++IPriv;
5641       ++ILHS;
5642       ++IRHS;
5643     }
5644   };
5645   RegionCodeGenTy RCG(CodeGen);
5646   CommonActionTy Action(
5647       nullptr, llvm::None,
5648       OMPBuilder.getOrCreateRuntimeFunction(
5649           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5650                                       : OMPRTL___kmpc_end_reduce),
5651       EndArgs);
5652   RCG.setAction(Action);
5653   RCG(CGF);
5654 
5655   CGF.EmitBranch(DefaultBB);
5656 
5657   // 7. Build case 2:
5658   //  ...
5659   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5660   //  ...
5661   // break;
5662   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5663   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5664   CGF.EmitBlock(Case2BB);
5665 
5666   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5667                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5668     auto ILHS = LHSExprs.begin();
5669     auto IRHS = RHSExprs.begin();
5670     auto IPriv = Privates.begin();
5671     for (const Expr *E : ReductionOps) {
5672       const Expr *XExpr = nullptr;
5673       const Expr *EExpr = nullptr;
5674       const Expr *UpExpr = nullptr;
5675       BinaryOperatorKind BO = BO_Comma;
5676       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5677         if (BO->getOpcode() == BO_Assign) {
5678           XExpr = BO->getLHS();
5679           UpExpr = BO->getRHS();
5680         }
5681       }
5682       // Try to emit update expression as a simple atomic.
5683       const Expr *RHSExpr = UpExpr;
5684       if (RHSExpr) {
5685         // Analyze RHS part of the whole expression.
5686         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5687                 RHSExpr->IgnoreParenImpCasts())) {
5688           // If this is a conditional operator, analyze its condition for
5689           // min/max reduction operator.
5690           RHSExpr = ACO->getCond();
5691         }
5692         if (const auto *BORHS =
5693                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5694           EExpr = BORHS->getRHS();
5695           BO = BORHS->getOpcode();
5696         }
5697       }
5698       if (XExpr) {
5699         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5700         auto &&AtomicRedGen = [BO, VD,
5701                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5702                                     const Expr *EExpr, const Expr *UpExpr) {
5703           LValue X = CGF.EmitLValue(XExpr);
5704           RValue E;
5705           if (EExpr)
5706             E = CGF.EmitAnyExpr(EExpr);
5707           CGF.EmitOMPAtomicSimpleUpdateExpr(
5708               X, E, BO, /*IsXLHSInRHSPart=*/true,
5709               llvm::AtomicOrdering::Monotonic, Loc,
5710               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5711                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5712                 PrivateScope.addPrivate(
5713                     VD, [&CGF, VD, XRValue, Loc]() {
5714                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5715                       CGF.emitOMPSimpleStore(
5716                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5717                           VD->getType().getNonReferenceType(), Loc);
5718                       return LHSTemp;
5719                     });
5720                 (void)PrivateScope.Privatize();
5721                 return CGF.EmitAnyExpr(UpExpr);
5722               });
5723         };
5724         if ((*IPriv)->getType()->isArrayType()) {
5725           // Emit atomic reduction for array section.
5726           const auto *RHSVar =
5727               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5728           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5729                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5730         } else {
5731           // Emit atomic reduction for array subscript or single variable.
5732           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5733         }
5734       } else {
5735         // Emit as a critical region.
5736         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5737                                            const Expr *, const Expr *) {
5738           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5739           std::string Name = RT.getName({"atomic_reduction"});
5740           RT.emitCriticalRegion(
5741               CGF, Name,
5742               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5743                 Action.Enter(CGF);
5744                 emitReductionCombiner(CGF, E);
5745               },
5746               Loc);
5747         };
5748         if ((*IPriv)->getType()->isArrayType()) {
5749           const auto *LHSVar =
5750               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5751           const auto *RHSVar =
5752               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5753           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5754                                     CritRedGen);
5755         } else {
5756           CritRedGen(CGF, nullptr, nullptr, nullptr);
5757         }
5758       }
5759       ++ILHS;
5760       ++IRHS;
5761       ++IPriv;
5762     }
5763   };
5764   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5765   if (!WithNowait) {
5766     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5767     llvm::Value *EndArgs[] = {
5768         IdentTLoc, // ident_t *<loc>
5769         ThreadId,  // i32 <gtid>
5770         Lock       // kmp_critical_name *&<lock>
5771     };
5772     CommonActionTy Action(nullptr, llvm::None,
5773                           OMPBuilder.getOrCreateRuntimeFunction(
5774                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5775                           EndArgs);
5776     AtomicRCG.setAction(Action);
5777     AtomicRCG(CGF);
5778   } else {
5779     AtomicRCG(CGF);
5780   }
5781 
5782   CGF.EmitBranch(DefaultBB);
5783   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5784 }
5785 
5786 /// Generates unique name for artificial threadprivate variables.
5787 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5788 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5789                                       const Expr *Ref) {
5790   SmallString<256> Buffer;
5791   llvm::raw_svector_ostream Out(Buffer);
5792   const clang::DeclRefExpr *DE;
5793   const VarDecl *D = ::getBaseDecl(Ref, DE);
5794   if (!D)
5795     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5796   D = D->getCanonicalDecl();
5797   std::string Name = CGM.getOpenMPRuntime().getName(
5798       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5799   Out << Prefix << Name << "_"
5800       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5801   return std::string(Out.str());
5802 }
5803 
5804 /// Emits reduction initializer function:
5805 /// \code
5806 /// void @.red_init(void* %arg, void* %orig) {
5807 /// %0 = bitcast void* %arg to <type>*
5808 /// store <type> <init>, <type>* %0
5809 /// ret void
5810 /// }
5811 /// \endcode
5812 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5813                                            SourceLocation Loc,
5814                                            ReductionCodeGen &RCG, unsigned N) {
5815   ASTContext &C = CGM.getContext();
5816   QualType VoidPtrTy = C.VoidPtrTy;
5817   VoidPtrTy.addRestrict();
5818   FunctionArgList Args;
5819   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5820                           ImplicitParamDecl::Other);
5821   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5822                               ImplicitParamDecl::Other);
5823   Args.emplace_back(&Param);
5824   Args.emplace_back(&ParamOrig);
5825   const auto &FnInfo =
5826       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5827   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5828   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5829   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5830                                     Name, &CGM.getModule());
5831   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5832   Fn->setDoesNotRecurse();
5833   CodeGenFunction CGF(CGM);
5834   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5835   Address PrivateAddr = CGF.EmitLoadOfPointer(
5836       CGF.GetAddrOfLocalVar(&Param),
5837       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5838   llvm::Value *Size = nullptr;
5839   // If the size of the reduction item is non-constant, load it from global
5840   // threadprivate variable.
5841   if (RCG.getSizes(N).second) {
5842     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5843         CGF, CGM.getContext().getSizeType(),
5844         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5845     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5846                                 CGM.getContext().getSizeType(), Loc);
5847   }
5848   RCG.emitAggregateType(CGF, N, Size);
5849   LValue OrigLVal;
5850   // If initializer uses initializer from declare reduction construct, emit a
5851   // pointer to the address of the original reduction item (reuired by reduction
5852   // initializer)
5853   if (RCG.usesReductionInitializer(N)) {
5854     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5855     SharedAddr = CGF.EmitLoadOfPointer(
5856         SharedAddr,
5857         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5858     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5859   } else {
5860     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5861         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5862         CGM.getContext().VoidPtrTy);
5863   }
5864   // Emit the initializer:
5865   // %0 = bitcast void* %arg to <type>*
5866   // store <type> <init>, <type>* %0
5867   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5868                          [](CodeGenFunction &) { return false; });
5869   CGF.FinishFunction();
5870   return Fn;
5871 }
5872 
5873 /// Emits reduction combiner function:
5874 /// \code
5875 /// void @.red_comb(void* %arg0, void* %arg1) {
5876 /// %lhs = bitcast void* %arg0 to <type>*
5877 /// %rhs = bitcast void* %arg1 to <type>*
5878 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5879 /// store <type> %2, <type>* %lhs
5880 /// ret void
5881 /// }
5882 /// \endcode
5883 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5884                                            SourceLocation Loc,
5885                                            ReductionCodeGen &RCG, unsigned N,
5886                                            const Expr *ReductionOp,
5887                                            const Expr *LHS, const Expr *RHS,
5888                                            const Expr *PrivateRef) {
5889   ASTContext &C = CGM.getContext();
5890   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5891   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5892   FunctionArgList Args;
5893   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5894                                C.VoidPtrTy, ImplicitParamDecl::Other);
5895   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5896                             ImplicitParamDecl::Other);
5897   Args.emplace_back(&ParamInOut);
5898   Args.emplace_back(&ParamIn);
5899   const auto &FnInfo =
5900       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5901   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5902   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5903   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5904                                     Name, &CGM.getModule());
5905   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5906   Fn->setDoesNotRecurse();
5907   CodeGenFunction CGF(CGM);
5908   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5909   llvm::Value *Size = nullptr;
5910   // If the size of the reduction item is non-constant, load it from global
5911   // threadprivate variable.
5912   if (RCG.getSizes(N).second) {
5913     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5914         CGF, CGM.getContext().getSizeType(),
5915         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5916     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5917                                 CGM.getContext().getSizeType(), Loc);
5918   }
5919   RCG.emitAggregateType(CGF, N, Size);
5920   // Remap lhs and rhs variables to the addresses of the function arguments.
5921   // %lhs = bitcast void* %arg0 to <type>*
5922   // %rhs = bitcast void* %arg1 to <type>*
5923   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5924   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5925     // Pull out the pointer to the variable.
5926     Address PtrAddr = CGF.EmitLoadOfPointer(
5927         CGF.GetAddrOfLocalVar(&ParamInOut),
5928         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5929     return CGF.Builder.CreateElementBitCast(
5930         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5931   });
5932   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5933     // Pull out the pointer to the variable.
5934     Address PtrAddr = CGF.EmitLoadOfPointer(
5935         CGF.GetAddrOfLocalVar(&ParamIn),
5936         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5937     return CGF.Builder.CreateElementBitCast(
5938         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5939   });
5940   PrivateScope.Privatize();
5941   // Emit the combiner body:
5942   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5943   // store <type> %2, <type>* %lhs
5944   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5945       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5946       cast<DeclRefExpr>(RHS));
5947   CGF.FinishFunction();
5948   return Fn;
5949 }
5950 
5951 /// Emits reduction finalizer function:
5952 /// \code
5953 /// void @.red_fini(void* %arg) {
5954 /// %0 = bitcast void* %arg to <type>*
5955 /// <destroy>(<type>* %0)
5956 /// ret void
5957 /// }
5958 /// \endcode
5959 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5960                                            SourceLocation Loc,
5961                                            ReductionCodeGen &RCG, unsigned N) {
5962   if (!RCG.needCleanups(N))
5963     return nullptr;
5964   ASTContext &C = CGM.getContext();
5965   FunctionArgList Args;
5966   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5967                           ImplicitParamDecl::Other);
5968   Args.emplace_back(&Param);
5969   const auto &FnInfo =
5970       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5971   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5972   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5973   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5974                                     Name, &CGM.getModule());
5975   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5976   Fn->setDoesNotRecurse();
5977   CodeGenFunction CGF(CGM);
5978   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5979   Address PrivateAddr = CGF.EmitLoadOfPointer(
5980       CGF.GetAddrOfLocalVar(&Param),
5981       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5982   llvm::Value *Size = nullptr;
5983   // If the size of the reduction item is non-constant, load it from global
5984   // threadprivate variable.
5985   if (RCG.getSizes(N).second) {
5986     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5987         CGF, CGM.getContext().getSizeType(),
5988         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5989     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5990                                 CGM.getContext().getSizeType(), Loc);
5991   }
5992   RCG.emitAggregateType(CGF, N, Size);
5993   // Emit the finalizer body:
5994   // <destroy>(<type>* %0)
5995   RCG.emitCleanups(CGF, N, PrivateAddr);
5996   CGF.FinishFunction(Loc);
5997   return Fn;
5998 }
5999 
6000 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6001     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6002     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6003   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6004     return nullptr;
6005 
6006   // Build typedef struct:
6007   // kmp_taskred_input {
6008   //   void *reduce_shar; // shared reduction item
6009   //   void *reduce_orig; // original reduction item used for initialization
6010   //   size_t reduce_size; // size of data item
6011   //   void *reduce_init; // data initialization routine
6012   //   void *reduce_fini; // data finalization routine
6013   //   void *reduce_comb; // data combiner routine
6014   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6015   // } kmp_taskred_input_t;
6016   ASTContext &C = CGM.getContext();
6017   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6018   RD->startDefinition();
6019   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6020   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6021   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6022   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6023   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6024   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6025   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6026       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6027   RD->completeDefinition();
6028   QualType RDType = C.getRecordType(RD);
6029   unsigned Size = Data.ReductionVars.size();
6030   llvm::APInt ArraySize(/*numBits=*/64, Size);
6031   QualType ArrayRDType = C.getConstantArrayType(
6032       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6033   // kmp_task_red_input_t .rd_input.[Size];
6034   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6035   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6036                        Data.ReductionCopies, Data.ReductionOps);
6037   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6038     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6039     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6040                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6041     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6042         TaskRedInput.getPointer(), Idxs,
6043         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6044         ".rd_input.gep.");
6045     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6046     // ElemLVal.reduce_shar = &Shareds[Cnt];
6047     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6048     RCG.emitSharedOrigLValue(CGF, Cnt);
6049     llvm::Value *CastedShared =
6050         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6051     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6052     // ElemLVal.reduce_orig = &Origs[Cnt];
6053     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6054     llvm::Value *CastedOrig =
6055         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6056     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6057     RCG.emitAggregateType(CGF, Cnt);
6058     llvm::Value *SizeValInChars;
6059     llvm::Value *SizeVal;
6060     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6061     // We use delayed creation/initialization for VLAs and array sections. It is
6062     // required because runtime does not provide the way to pass the sizes of
6063     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6064     // threadprivate global variables are used to store these values and use
6065     // them in the functions.
6066     bool DelayedCreation = !!SizeVal;
6067     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6068                                                /*isSigned=*/false);
6069     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6070     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6071     // ElemLVal.reduce_init = init;
6072     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6073     llvm::Value *InitAddr =
6074         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6075     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6076     // ElemLVal.reduce_fini = fini;
6077     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6078     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6079     llvm::Value *FiniAddr = Fini
6080                                 ? CGF.EmitCastToVoidPtr(Fini)
6081                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6082     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6083     // ElemLVal.reduce_comb = comb;
6084     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6085     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6086         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6087         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6088     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6089     // ElemLVal.flags = 0;
6090     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6091     if (DelayedCreation) {
6092       CGF.EmitStoreOfScalar(
6093           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6094           FlagsLVal);
6095     } else
6096       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6097                                  FlagsLVal.getType());
6098   }
6099   if (Data.IsReductionWithTaskMod) {
6100     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6101     // is_ws, int num, void *data);
6102     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6103     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6104                                                   CGM.IntTy, /*isSigned=*/true);
6105     llvm::Value *Args[] = {
6106         IdentTLoc, GTid,
6107         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6108                                /*isSigned=*/true),
6109         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6110         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6111             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6112     return CGF.EmitRuntimeCall(
6113         OMPBuilder.getOrCreateRuntimeFunction(
6114             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6115         Args);
6116   }
6117   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6118   llvm::Value *Args[] = {
6119       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6120                                 /*isSigned=*/true),
6121       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6122       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6123                                                       CGM.VoidPtrTy)};
6124   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6125                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6126                              Args);
6127 }
6128 
6129 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6130                                             SourceLocation Loc,
6131                                             bool IsWorksharingReduction) {
6132   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6133   // is_ws, int num, void *data);
6134   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6135   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6136                                                 CGM.IntTy, /*isSigned=*/true);
6137   llvm::Value *Args[] = {IdentTLoc, GTid,
6138                          llvm::ConstantInt::get(CGM.IntTy,
6139                                                 IsWorksharingReduction ? 1 : 0,
6140                                                 /*isSigned=*/true)};
6141   (void)CGF.EmitRuntimeCall(
6142       OMPBuilder.getOrCreateRuntimeFunction(
6143           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6144       Args);
6145 }
6146 
6147 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6148                                               SourceLocation Loc,
6149                                               ReductionCodeGen &RCG,
6150                                               unsigned N) {
6151   auto Sizes = RCG.getSizes(N);
6152   // Emit threadprivate global variable if the type is non-constant
6153   // (Sizes.second = nullptr).
6154   if (Sizes.second) {
6155     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6156                                                      /*isSigned=*/false);
6157     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6158         CGF, CGM.getContext().getSizeType(),
6159         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6160     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6161   }
6162 }
6163 
6164 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6165                                               SourceLocation Loc,
6166                                               llvm::Value *ReductionsPtr,
6167                                               LValue SharedLVal) {
6168   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6169   // *d);
6170   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6171                                                    CGM.IntTy,
6172                                                    /*isSigned=*/true),
6173                          ReductionsPtr,
6174                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6175                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6176   return Address(
6177       CGF.EmitRuntimeCall(
6178           OMPBuilder.getOrCreateRuntimeFunction(
6179               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6180           Args),
6181       SharedLVal.getAlignment());
6182 }
6183 
6184 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6185                                        SourceLocation Loc) {
6186   if (!CGF.HaveInsertPoint())
6187     return;
6188 
6189   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6190     OMPBuilder.CreateTaskwait(CGF.Builder);
6191   } else {
6192     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6193     // global_tid);
6194     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6195     // Ignore return result until untied tasks are supported.
6196     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6197                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6198                         Args);
6199   }
6200 
6201   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6202     Region->emitUntiedSwitch(CGF);
6203 }
6204 
6205 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6206                                            OpenMPDirectiveKind InnerKind,
6207                                            const RegionCodeGenTy &CodeGen,
6208                                            bool HasCancel) {
6209   if (!CGF.HaveInsertPoint())
6210     return;
6211   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6212   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6213 }
6214 
6215 namespace {
6216 enum RTCancelKind {
6217   CancelNoreq = 0,
6218   CancelParallel = 1,
6219   CancelLoop = 2,
6220   CancelSections = 3,
6221   CancelTaskgroup = 4
6222 };
6223 } // anonymous namespace
6224 
6225 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6226   RTCancelKind CancelKind = CancelNoreq;
6227   if (CancelRegion == OMPD_parallel)
6228     CancelKind = CancelParallel;
6229   else if (CancelRegion == OMPD_for)
6230     CancelKind = CancelLoop;
6231   else if (CancelRegion == OMPD_sections)
6232     CancelKind = CancelSections;
6233   else {
6234     assert(CancelRegion == OMPD_taskgroup);
6235     CancelKind = CancelTaskgroup;
6236   }
6237   return CancelKind;
6238 }
6239 
6240 void CGOpenMPRuntime::emitCancellationPointCall(
6241     CodeGenFunction &CGF, SourceLocation Loc,
6242     OpenMPDirectiveKind CancelRegion) {
6243   if (!CGF.HaveInsertPoint())
6244     return;
6245   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6246   // global_tid, kmp_int32 cncl_kind);
6247   if (auto *OMPRegionInfo =
6248           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6249     // For 'cancellation point taskgroup', the task region info may not have a
6250     // cancel. This may instead happen in another adjacent task.
6251     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6252       llvm::Value *Args[] = {
6253           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6254           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6255       // Ignore return result until untied tasks are supported.
6256       llvm::Value *Result = CGF.EmitRuntimeCall(
6257           OMPBuilder.getOrCreateRuntimeFunction(
6258               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6259           Args);
6260       // if (__kmpc_cancellationpoint()) {
6261       //   exit from construct;
6262       // }
6263       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6264       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6265       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6266       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6267       CGF.EmitBlock(ExitBB);
6268       // exit from construct;
6269       CodeGenFunction::JumpDest CancelDest =
6270           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6271       CGF.EmitBranchThroughCleanup(CancelDest);
6272       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6273     }
6274   }
6275 }
6276 
6277 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6278                                      const Expr *IfCond,
6279                                      OpenMPDirectiveKind CancelRegion) {
6280   if (!CGF.HaveInsertPoint())
6281     return;
6282   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6283   // kmp_int32 cncl_kind);
6284   auto &M = CGM.getModule();
6285   if (auto *OMPRegionInfo =
6286           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6287     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6288                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6289       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6290       llvm::Value *Args[] = {
6291           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6292           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6293       // Ignore return result until untied tasks are supported.
6294       llvm::Value *Result = CGF.EmitRuntimeCall(
6295           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6296       // if (__kmpc_cancel()) {
6297       //   exit from construct;
6298       // }
6299       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6300       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6301       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6302       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6303       CGF.EmitBlock(ExitBB);
6304       // exit from construct;
6305       CodeGenFunction::JumpDest CancelDest =
6306           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6307       CGF.EmitBranchThroughCleanup(CancelDest);
6308       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6309     };
6310     if (IfCond) {
6311       emitIfClause(CGF, IfCond, ThenGen,
6312                    [](CodeGenFunction &, PrePostActionTy &) {});
6313     } else {
6314       RegionCodeGenTy ThenRCG(ThenGen);
6315       ThenRCG(CGF);
6316     }
6317   }
6318 }
6319 
6320 namespace {
6321 /// Cleanup action for uses_allocators support.
6322 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6323   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6324 
6325 public:
6326   OMPUsesAllocatorsActionTy(
6327       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6328       : Allocators(Allocators) {}
6329   void Enter(CodeGenFunction &CGF) override {
6330     if (!CGF.HaveInsertPoint())
6331       return;
6332     for (const auto &AllocatorData : Allocators) {
6333       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6334           CGF, AllocatorData.first, AllocatorData.second);
6335     }
6336   }
6337   void Exit(CodeGenFunction &CGF) override {
6338     if (!CGF.HaveInsertPoint())
6339       return;
6340     for (const auto &AllocatorData : Allocators) {
6341       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6342                                                         AllocatorData.first);
6343     }
6344   }
6345 };
6346 } // namespace
6347 
6348 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6349     const OMPExecutableDirective &D, StringRef ParentName,
6350     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6351     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6352   assert(!ParentName.empty() && "Invalid target region parent name!");
6353   HasEmittedTargetRegion = true;
6354   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6355   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6356     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6357       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6358       if (!D.AllocatorTraits)
6359         continue;
6360       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6361     }
6362   }
6363   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6364   CodeGen.setAction(UsesAllocatorAction);
6365   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6366                                    IsOffloadEntry, CodeGen);
6367 }
6368 
6369 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6370                                              const Expr *Allocator,
6371                                              const Expr *AllocatorTraits) {
6372   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6373   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6374   // Use default memspace handle.
6375   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6376   llvm::Value *NumTraits = llvm::ConstantInt::get(
6377       CGF.IntTy, cast<ConstantArrayType>(
6378                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6379                      ->getSize()
6380                      .getLimitedValue());
6381   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6382   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6383       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6384   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6385                                            AllocatorTraitsLVal.getBaseInfo(),
6386                                            AllocatorTraitsLVal.getTBAAInfo());
6387   llvm::Value *Traits =
6388       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6389 
6390   llvm::Value *AllocatorVal =
6391       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6392                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6393                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6394   // Store to allocator.
6395   CGF.EmitVarDecl(*cast<VarDecl>(
6396       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6397   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6398   AllocatorVal =
6399       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6400                                Allocator->getType(), Allocator->getExprLoc());
6401   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6402 }
6403 
6404 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6405                                              const Expr *Allocator) {
6406   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6407   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6408   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6409   llvm::Value *AllocatorVal =
6410       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6411   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6412                                           CGF.getContext().VoidPtrTy,
6413                                           Allocator->getExprLoc());
6414   (void)CGF.EmitRuntimeCall(
6415       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6416                                             OMPRTL___kmpc_destroy_allocator),
6417       {ThreadId, AllocatorVal});
6418 }
6419 
6420 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6421     const OMPExecutableDirective &D, StringRef ParentName,
6422     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6423     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6424   // Create a unique name for the entry function using the source location
6425   // information of the current target region. The name will be something like:
6426   //
6427   // __omp_offloading_DD_FFFF_PP_lBB
6428   //
6429   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6430   // mangled name of the function that encloses the target region and BB is the
6431   // line number of the target region.
6432 
6433   unsigned DeviceID;
6434   unsigned FileID;
6435   unsigned Line;
6436   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6437                            Line);
6438   SmallString<64> EntryFnName;
6439   {
6440     llvm::raw_svector_ostream OS(EntryFnName);
6441     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6442        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6443   }
6444 
6445   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6446 
6447   CodeGenFunction CGF(CGM, true);
6448   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6449   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6450 
6451   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6452 
6453   // If this target outline function is not an offload entry, we don't need to
6454   // register it.
6455   if (!IsOffloadEntry)
6456     return;
6457 
6458   // The target region ID is used by the runtime library to identify the current
6459   // target region, so it only has to be unique and not necessarily point to
6460   // anything. It could be the pointer to the outlined function that implements
6461   // the target region, but we aren't using that so that the compiler doesn't
6462   // need to keep that, and could therefore inline the host function if proven
6463   // worthwhile during optimization. In the other hand, if emitting code for the
6464   // device, the ID has to be the function address so that it can retrieved from
6465   // the offloading entry and launched by the runtime library. We also mark the
6466   // outlined function to have external linkage in case we are emitting code for
6467   // the device, because these functions will be entry points to the device.
6468 
6469   if (CGM.getLangOpts().OpenMPIsDevice) {
6470     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6471     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6472     OutlinedFn->setDSOLocal(false);
6473   } else {
6474     std::string Name = getName({EntryFnName, "region_id"});
6475     OutlinedFnID = new llvm::GlobalVariable(
6476         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6477         llvm::GlobalValue::WeakAnyLinkage,
6478         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6479   }
6480 
6481   // Register the information for the entry associated with this target region.
6482   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6483       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6484       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6485 }
6486 
6487 /// Checks if the expression is constant or does not have non-trivial function
6488 /// calls.
6489 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6490   // We can skip constant expressions.
6491   // We can skip expressions with trivial calls or simple expressions.
6492   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6493           !E->hasNonTrivialCall(Ctx)) &&
6494          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6495 }
6496 
6497 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6498                                                     const Stmt *Body) {
6499   const Stmt *Child = Body->IgnoreContainers();
6500   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6501     Child = nullptr;
6502     for (const Stmt *S : C->body()) {
6503       if (const auto *E = dyn_cast<Expr>(S)) {
6504         if (isTrivial(Ctx, E))
6505           continue;
6506       }
6507       // Some of the statements can be ignored.
6508       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6509           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6510         continue;
6511       // Analyze declarations.
6512       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6513         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6514               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6515                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6516                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6517                   isa<UsingDirectiveDecl>(D) ||
6518                   isa<OMPDeclareReductionDecl>(D) ||
6519                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6520                 return true;
6521               const auto *VD = dyn_cast<VarDecl>(D);
6522               if (!VD)
6523                 return false;
6524               return VD->isConstexpr() ||
6525                      ((VD->getType().isTrivialType(Ctx) ||
6526                        VD->getType()->isReferenceType()) &&
6527                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6528             }))
6529           continue;
6530       }
6531       // Found multiple children - cannot get the one child only.
6532       if (Child)
6533         return nullptr;
6534       Child = S;
6535     }
6536     if (Child)
6537       Child = Child->IgnoreContainers();
6538   }
6539   return Child;
6540 }
6541 
6542 /// Emit the number of teams for a target directive.  Inspect the num_teams
6543 /// clause associated with a teams construct combined or closely nested
6544 /// with the target directive.
6545 ///
6546 /// Emit a team of size one for directives such as 'target parallel' that
6547 /// have no associated teams construct.
6548 ///
6549 /// Otherwise, return nullptr.
6550 static llvm::Value *
6551 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6552                                const OMPExecutableDirective &D) {
6553   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6554          "Clauses associated with the teams directive expected to be emitted "
6555          "only for the host!");
6556   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6557   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6558          "Expected target-based executable directive.");
6559   CGBuilderTy &Bld = CGF.Builder;
6560   switch (DirectiveKind) {
6561   case OMPD_target: {
6562     const auto *CS = D.getInnermostCapturedStmt();
6563     const auto *Body =
6564         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6565     const Stmt *ChildStmt =
6566         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6567     if (const auto *NestedDir =
6568             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6569       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6570         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6571           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6572           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6573           const Expr *NumTeams =
6574               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6575           llvm::Value *NumTeamsVal =
6576               CGF.EmitScalarExpr(NumTeams,
6577                                  /*IgnoreResultAssign*/ true);
6578           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6579                                    /*isSigned=*/true);
6580         }
6581         return Bld.getInt32(0);
6582       }
6583       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6584           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6585         return Bld.getInt32(1);
6586       return Bld.getInt32(0);
6587     }
6588     return nullptr;
6589   }
6590   case OMPD_target_teams:
6591   case OMPD_target_teams_distribute:
6592   case OMPD_target_teams_distribute_simd:
6593   case OMPD_target_teams_distribute_parallel_for:
6594   case OMPD_target_teams_distribute_parallel_for_simd: {
6595     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6596       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6597       const Expr *NumTeams =
6598           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6599       llvm::Value *NumTeamsVal =
6600           CGF.EmitScalarExpr(NumTeams,
6601                              /*IgnoreResultAssign*/ true);
6602       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6603                                /*isSigned=*/true);
6604     }
6605     return Bld.getInt32(0);
6606   }
6607   case OMPD_target_parallel:
6608   case OMPD_target_parallel_for:
6609   case OMPD_target_parallel_for_simd:
6610   case OMPD_target_simd:
6611     return Bld.getInt32(1);
6612   case OMPD_parallel:
6613   case OMPD_for:
6614   case OMPD_parallel_for:
6615   case OMPD_parallel_master:
6616   case OMPD_parallel_sections:
6617   case OMPD_for_simd:
6618   case OMPD_parallel_for_simd:
6619   case OMPD_cancel:
6620   case OMPD_cancellation_point:
6621   case OMPD_ordered:
6622   case OMPD_threadprivate:
6623   case OMPD_allocate:
6624   case OMPD_task:
6625   case OMPD_simd:
6626   case OMPD_sections:
6627   case OMPD_section:
6628   case OMPD_single:
6629   case OMPD_master:
6630   case OMPD_critical:
6631   case OMPD_taskyield:
6632   case OMPD_barrier:
6633   case OMPD_taskwait:
6634   case OMPD_taskgroup:
6635   case OMPD_atomic:
6636   case OMPD_flush:
6637   case OMPD_depobj:
6638   case OMPD_scan:
6639   case OMPD_teams:
6640   case OMPD_target_data:
6641   case OMPD_target_exit_data:
6642   case OMPD_target_enter_data:
6643   case OMPD_distribute:
6644   case OMPD_distribute_simd:
6645   case OMPD_distribute_parallel_for:
6646   case OMPD_distribute_parallel_for_simd:
6647   case OMPD_teams_distribute:
6648   case OMPD_teams_distribute_simd:
6649   case OMPD_teams_distribute_parallel_for:
6650   case OMPD_teams_distribute_parallel_for_simd:
6651   case OMPD_target_update:
6652   case OMPD_declare_simd:
6653   case OMPD_declare_variant:
6654   case OMPD_begin_declare_variant:
6655   case OMPD_end_declare_variant:
6656   case OMPD_declare_target:
6657   case OMPD_end_declare_target:
6658   case OMPD_declare_reduction:
6659   case OMPD_declare_mapper:
6660   case OMPD_taskloop:
6661   case OMPD_taskloop_simd:
6662   case OMPD_master_taskloop:
6663   case OMPD_master_taskloop_simd:
6664   case OMPD_parallel_master_taskloop:
6665   case OMPD_parallel_master_taskloop_simd:
6666   case OMPD_requires:
6667   case OMPD_unknown:
6668     break;
6669   default:
6670     break;
6671   }
6672   llvm_unreachable("Unexpected directive kind.");
6673 }
6674 
6675 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6676                                   llvm::Value *DefaultThreadLimitVal) {
6677   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6678       CGF.getContext(), CS->getCapturedStmt());
6679   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6680     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6681       llvm::Value *NumThreads = nullptr;
6682       llvm::Value *CondVal = nullptr;
6683       // Handle if clause. If if clause present, the number of threads is
6684       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6685       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6686         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6687         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6688         const OMPIfClause *IfClause = nullptr;
6689         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6690           if (C->getNameModifier() == OMPD_unknown ||
6691               C->getNameModifier() == OMPD_parallel) {
6692             IfClause = C;
6693             break;
6694           }
6695         }
6696         if (IfClause) {
6697           const Expr *Cond = IfClause->getCondition();
6698           bool Result;
6699           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6700             if (!Result)
6701               return CGF.Builder.getInt32(1);
6702           } else {
6703             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6704             if (const auto *PreInit =
6705                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6706               for (const auto *I : PreInit->decls()) {
6707                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6708                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6709                 } else {
6710                   CodeGenFunction::AutoVarEmission Emission =
6711                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6712                   CGF.EmitAutoVarCleanups(Emission);
6713                 }
6714               }
6715             }
6716             CondVal = CGF.EvaluateExprAsBool(Cond);
6717           }
6718         }
6719       }
6720       // Check the value of num_threads clause iff if clause was not specified
6721       // or is not evaluated to false.
6722       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6723         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6724         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6725         const auto *NumThreadsClause =
6726             Dir->getSingleClause<OMPNumThreadsClause>();
6727         CodeGenFunction::LexicalScope Scope(
6728             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6729         if (const auto *PreInit =
6730                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6731           for (const auto *I : PreInit->decls()) {
6732             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6733               CGF.EmitVarDecl(cast<VarDecl>(*I));
6734             } else {
6735               CodeGenFunction::AutoVarEmission Emission =
6736                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6737               CGF.EmitAutoVarCleanups(Emission);
6738             }
6739           }
6740         }
6741         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6742         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6743                                                /*isSigned=*/false);
6744         if (DefaultThreadLimitVal)
6745           NumThreads = CGF.Builder.CreateSelect(
6746               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6747               DefaultThreadLimitVal, NumThreads);
6748       } else {
6749         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6750                                            : CGF.Builder.getInt32(0);
6751       }
6752       // Process condition of the if clause.
6753       if (CondVal) {
6754         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6755                                               CGF.Builder.getInt32(1));
6756       }
6757       return NumThreads;
6758     }
6759     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6760       return CGF.Builder.getInt32(1);
6761     return DefaultThreadLimitVal;
6762   }
6763   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6764                                : CGF.Builder.getInt32(0);
6765 }
6766 
6767 /// Emit the number of threads for a target directive.  Inspect the
6768 /// thread_limit clause associated with a teams construct combined or closely
6769 /// nested with the target directive.
6770 ///
6771 /// Emit the num_threads clause for directives such as 'target parallel' that
6772 /// have no associated teams construct.
6773 ///
6774 /// Otherwise, return nullptr.
6775 static llvm::Value *
6776 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6777                                  const OMPExecutableDirective &D) {
6778   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6779          "Clauses associated with the teams directive expected to be emitted "
6780          "only for the host!");
6781   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6782   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6783          "Expected target-based executable directive.");
6784   CGBuilderTy &Bld = CGF.Builder;
6785   llvm::Value *ThreadLimitVal = nullptr;
6786   llvm::Value *NumThreadsVal = nullptr;
6787   switch (DirectiveKind) {
6788   case OMPD_target: {
6789     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6790     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6791       return NumThreads;
6792     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6793         CGF.getContext(), CS->getCapturedStmt());
6794     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6795       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6796         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6797         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6798         const auto *ThreadLimitClause =
6799             Dir->getSingleClause<OMPThreadLimitClause>();
6800         CodeGenFunction::LexicalScope Scope(
6801             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6802         if (const auto *PreInit =
6803                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6804           for (const auto *I : PreInit->decls()) {
6805             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6806               CGF.EmitVarDecl(cast<VarDecl>(*I));
6807             } else {
6808               CodeGenFunction::AutoVarEmission Emission =
6809                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6810               CGF.EmitAutoVarCleanups(Emission);
6811             }
6812           }
6813         }
6814         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6815             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6816         ThreadLimitVal =
6817             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6818       }
6819       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6820           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6821         CS = Dir->getInnermostCapturedStmt();
6822         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6823             CGF.getContext(), CS->getCapturedStmt());
6824         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6825       }
6826       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6827           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6828         CS = Dir->getInnermostCapturedStmt();
6829         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6830           return NumThreads;
6831       }
6832       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6833         return Bld.getInt32(1);
6834     }
6835     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6836   }
6837   case OMPD_target_teams: {
6838     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6839       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6840       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6841       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6842           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6843       ThreadLimitVal =
6844           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6845     }
6846     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6847     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6848       return NumThreads;
6849     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6850         CGF.getContext(), CS->getCapturedStmt());
6851     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6852       if (Dir->getDirectiveKind() == OMPD_distribute) {
6853         CS = Dir->getInnermostCapturedStmt();
6854         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6855           return NumThreads;
6856       }
6857     }
6858     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6859   }
6860   case OMPD_target_teams_distribute:
6861     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6862       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6863       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6864       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6865           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6866       ThreadLimitVal =
6867           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6868     }
6869     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6870   case OMPD_target_parallel:
6871   case OMPD_target_parallel_for:
6872   case OMPD_target_parallel_for_simd:
6873   case OMPD_target_teams_distribute_parallel_for:
6874   case OMPD_target_teams_distribute_parallel_for_simd: {
6875     llvm::Value *CondVal = nullptr;
6876     // Handle if clause. If if clause present, the number of threads is
6877     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6878     if (D.hasClausesOfKind<OMPIfClause>()) {
6879       const OMPIfClause *IfClause = nullptr;
6880       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6881         if (C->getNameModifier() == OMPD_unknown ||
6882             C->getNameModifier() == OMPD_parallel) {
6883           IfClause = C;
6884           break;
6885         }
6886       }
6887       if (IfClause) {
6888         const Expr *Cond = IfClause->getCondition();
6889         bool Result;
6890         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6891           if (!Result)
6892             return Bld.getInt32(1);
6893         } else {
6894           CodeGenFunction::RunCleanupsScope Scope(CGF);
6895           CondVal = CGF.EvaluateExprAsBool(Cond);
6896         }
6897       }
6898     }
6899     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904       ThreadLimitVal =
6905           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6906     }
6907     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6908       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6909       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6910       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6911           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6912       NumThreadsVal =
6913           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6914       ThreadLimitVal = ThreadLimitVal
6915                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6916                                                                 ThreadLimitVal),
6917                                               NumThreadsVal, ThreadLimitVal)
6918                            : NumThreadsVal;
6919     }
6920     if (!ThreadLimitVal)
6921       ThreadLimitVal = Bld.getInt32(0);
6922     if (CondVal)
6923       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6924     return ThreadLimitVal;
6925   }
6926   case OMPD_target_teams_distribute_simd:
6927   case OMPD_target_simd:
6928     return Bld.getInt32(1);
6929   case OMPD_parallel:
6930   case OMPD_for:
6931   case OMPD_parallel_for:
6932   case OMPD_parallel_master:
6933   case OMPD_parallel_sections:
6934   case OMPD_for_simd:
6935   case OMPD_parallel_for_simd:
6936   case OMPD_cancel:
6937   case OMPD_cancellation_point:
6938   case OMPD_ordered:
6939   case OMPD_threadprivate:
6940   case OMPD_allocate:
6941   case OMPD_task:
6942   case OMPD_simd:
6943   case OMPD_sections:
6944   case OMPD_section:
6945   case OMPD_single:
6946   case OMPD_master:
6947   case OMPD_critical:
6948   case OMPD_taskyield:
6949   case OMPD_barrier:
6950   case OMPD_taskwait:
6951   case OMPD_taskgroup:
6952   case OMPD_atomic:
6953   case OMPD_flush:
6954   case OMPD_depobj:
6955   case OMPD_scan:
6956   case OMPD_teams:
6957   case OMPD_target_data:
6958   case OMPD_target_exit_data:
6959   case OMPD_target_enter_data:
6960   case OMPD_distribute:
6961   case OMPD_distribute_simd:
6962   case OMPD_distribute_parallel_for:
6963   case OMPD_distribute_parallel_for_simd:
6964   case OMPD_teams_distribute:
6965   case OMPD_teams_distribute_simd:
6966   case OMPD_teams_distribute_parallel_for:
6967   case OMPD_teams_distribute_parallel_for_simd:
6968   case OMPD_target_update:
6969   case OMPD_declare_simd:
6970   case OMPD_declare_variant:
6971   case OMPD_begin_declare_variant:
6972   case OMPD_end_declare_variant:
6973   case OMPD_declare_target:
6974   case OMPD_end_declare_target:
6975   case OMPD_declare_reduction:
6976   case OMPD_declare_mapper:
6977   case OMPD_taskloop:
6978   case OMPD_taskloop_simd:
6979   case OMPD_master_taskloop:
6980   case OMPD_master_taskloop_simd:
6981   case OMPD_parallel_master_taskloop:
6982   case OMPD_parallel_master_taskloop_simd:
6983   case OMPD_requires:
6984   case OMPD_unknown:
6985     break;
6986   default:
6987     break;
6988   }
6989   llvm_unreachable("Unsupported directive kind.");
6990 }
6991 
6992 namespace {
6993 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6994 
6995 // Utility to handle information from clauses associated with a given
6996 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6997 // It provides a convenient interface to obtain the information and generate
6998 // code for that information.
6999 class MappableExprsHandler {
7000 public:
7001   /// Values for bit flags used to specify the mapping type for
7002   /// offloading.
7003   enum OpenMPOffloadMappingFlags : uint64_t {
7004     /// No flags
7005     OMP_MAP_NONE = 0x0,
7006     /// Allocate memory on the device and move data from host to device.
7007     OMP_MAP_TO = 0x01,
7008     /// Allocate memory on the device and move data from device to host.
7009     OMP_MAP_FROM = 0x02,
7010     /// Always perform the requested mapping action on the element, even
7011     /// if it was already mapped before.
7012     OMP_MAP_ALWAYS = 0x04,
7013     /// Delete the element from the device environment, ignoring the
7014     /// current reference count associated with the element.
7015     OMP_MAP_DELETE = 0x08,
7016     /// The element being mapped is a pointer-pointee pair; both the
7017     /// pointer and the pointee should be mapped.
7018     OMP_MAP_PTR_AND_OBJ = 0x10,
7019     /// This flags signals that the base address of an entry should be
7020     /// passed to the target kernel as an argument.
7021     OMP_MAP_TARGET_PARAM = 0x20,
7022     /// Signal that the runtime library has to return the device pointer
7023     /// in the current position for the data being mapped. Used when we have the
7024     /// use_device_ptr or use_device_addr clause.
7025     OMP_MAP_RETURN_PARAM = 0x40,
7026     /// This flag signals that the reference being passed is a pointer to
7027     /// private data.
7028     OMP_MAP_PRIVATE = 0x80,
7029     /// Pass the element to the device by value.
7030     OMP_MAP_LITERAL = 0x100,
7031     /// Implicit map
7032     OMP_MAP_IMPLICIT = 0x200,
7033     /// Close is a hint to the runtime to allocate memory close to
7034     /// the target device.
7035     OMP_MAP_CLOSE = 0x400,
7036     /// 0x800 is reserved for compatibility with XLC.
7037     /// Produce a runtime error if the data is not already allocated.
7038     OMP_MAP_PRESENT = 0x1000,
7039     /// Signal that the runtime library should use args as an array of
7040     /// descriptor_dim pointers and use args_size as dims. Used when we have
7041     /// non-contiguous list items in target update directive
7042     OMP_MAP_NON_CONTIG = 0x100000000000,
7043     /// The 16 MSBs of the flags indicate whether the entry is member of some
7044     /// struct/class.
7045     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7046     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7047   };
7048 
7049   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7050   static unsigned getFlagMemberOffset() {
7051     unsigned Offset = 0;
7052     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7053          Remain = Remain >> 1)
7054       Offset++;
7055     return Offset;
7056   }
7057 
7058   /// Class that associates information with a base pointer to be passed to the
7059   /// runtime library.
7060   class BasePointerInfo {
7061     /// The base pointer.
7062     llvm::Value *Ptr = nullptr;
7063     /// The base declaration that refers to this device pointer, or null if
7064     /// there is none.
7065     const ValueDecl *DevPtrDecl = nullptr;
7066 
7067   public:
7068     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7069         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7070     llvm::Value *operator*() const { return Ptr; }
7071     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7072     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7073   };
7074 
7075   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7076   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7077   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7078   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7079   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7080   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7081 
7082   /// This structure contains combined information generated for mappable
7083   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7084   /// mappers, and non-contiguous information.
7085   struct MapCombinedInfoTy {
7086     struct StructNonContiguousInfo {
7087       bool IsNonContiguous = false;
7088       MapDimArrayTy Dims;
7089       MapNonContiguousArrayTy Offsets;
7090       MapNonContiguousArrayTy Counts;
7091       MapNonContiguousArrayTy Strides;
7092     };
7093     MapBaseValuesArrayTy BasePointers;
7094     MapValuesArrayTy Pointers;
7095     MapValuesArrayTy Sizes;
7096     MapFlagsArrayTy Types;
7097     MapMappersArrayTy Mappers;
7098     StructNonContiguousInfo NonContigInfo;
7099 
7100     /// Append arrays in \a CurInfo.
7101     void append(MapCombinedInfoTy &CurInfo) {
7102       BasePointers.append(CurInfo.BasePointers.begin(),
7103                           CurInfo.BasePointers.end());
7104       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7105       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7106       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7107       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7108       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7109                                  CurInfo.NonContigInfo.Dims.end());
7110       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7111                                     CurInfo.NonContigInfo.Offsets.end());
7112       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7113                                    CurInfo.NonContigInfo.Counts.end());
7114       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7115                                     CurInfo.NonContigInfo.Strides.end());
7116     }
7117   };
7118 
7119   /// Map between a struct and the its lowest & highest elements which have been
7120   /// mapped.
7121   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7122   ///                    HE(FieldIndex, Pointer)}
7123   struct StructRangeInfoTy {
7124     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7125         0, Address::invalid()};
7126     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7127         0, Address::invalid()};
7128     Address Base = Address::invalid();
7129   };
7130 
7131 private:
7132   /// Kind that defines how a device pointer has to be returned.
7133   struct MapInfo {
7134     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7135     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7136     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7137     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7138     bool ReturnDevicePointer = false;
7139     bool IsImplicit = false;
7140     const ValueDecl *Mapper = nullptr;
7141     bool ForDeviceAddr = false;
7142 
7143     MapInfo() = default;
7144     MapInfo(
7145         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7146         OpenMPMapClauseKind MapType,
7147         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7148         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7149         bool ReturnDevicePointer, bool IsImplicit,
7150         const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false)
7151         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7152           MotionModifiers(MotionModifiers),
7153           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7154           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7155   };
7156 
7157   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7158   /// member and there is no map information about it, then emission of that
7159   /// entry is deferred until the whole struct has been processed.
7160   struct DeferredDevicePtrEntryTy {
7161     const Expr *IE = nullptr;
7162     const ValueDecl *VD = nullptr;
7163     bool ForDeviceAddr = false;
7164 
7165     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7166                              bool ForDeviceAddr)
7167         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7168   };
7169 
7170   /// The target directive from where the mappable clauses were extracted. It
7171   /// is either a executable directive or a user-defined mapper directive.
7172   llvm::PointerUnion<const OMPExecutableDirective *,
7173                      const OMPDeclareMapperDecl *>
7174       CurDir;
7175 
7176   /// Function the directive is being generated for.
7177   CodeGenFunction &CGF;
7178 
7179   /// Set of all first private variables in the current directive.
7180   /// bool data is set to true if the variable is implicitly marked as
7181   /// firstprivate, false otherwise.
7182   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7183 
7184   /// Map between device pointer declarations and their expression components.
7185   /// The key value for declarations in 'this' is null.
7186   llvm::DenseMap<
7187       const ValueDecl *,
7188       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7189       DevPointersMap;
7190 
7191   llvm::Value *getExprTypeSize(const Expr *E) const {
7192     QualType ExprTy = E->getType().getCanonicalType();
7193 
7194     // Calculate the size for array shaping expression.
7195     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7196       llvm::Value *Size =
7197           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7198       for (const Expr *SE : OAE->getDimensions()) {
7199         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7200         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7201                                       CGF.getContext().getSizeType(),
7202                                       SE->getExprLoc());
7203         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7204       }
7205       return Size;
7206     }
7207 
7208     // Reference types are ignored for mapping purposes.
7209     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7210       ExprTy = RefTy->getPointeeType().getCanonicalType();
7211 
7212     // Given that an array section is considered a built-in type, we need to
7213     // do the calculation based on the length of the section instead of relying
7214     // on CGF.getTypeSize(E->getType()).
7215     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7216       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7217                             OAE->getBase()->IgnoreParenImpCasts())
7218                             .getCanonicalType();
7219 
7220       // If there is no length associated with the expression and lower bound is
7221       // not specified too, that means we are using the whole length of the
7222       // base.
7223       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7224           !OAE->getLowerBound())
7225         return CGF.getTypeSize(BaseTy);
7226 
7227       llvm::Value *ElemSize;
7228       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7229         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7230       } else {
7231         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7232         assert(ATy && "Expecting array type if not a pointer type.");
7233         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7234       }
7235 
7236       // If we don't have a length at this point, that is because we have an
7237       // array section with a single element.
7238       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7239         return ElemSize;
7240 
7241       if (const Expr *LenExpr = OAE->getLength()) {
7242         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7243         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7244                                              CGF.getContext().getSizeType(),
7245                                              LenExpr->getExprLoc());
7246         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7247       }
7248       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7249              OAE->getLowerBound() && "expected array_section[lb:].");
7250       // Size = sizetype - lb * elemtype;
7251       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7252       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7253       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7254                                        CGF.getContext().getSizeType(),
7255                                        OAE->getLowerBound()->getExprLoc());
7256       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7257       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7258       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7259       LengthVal = CGF.Builder.CreateSelect(
7260           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7261       return LengthVal;
7262     }
7263     return CGF.getTypeSize(ExprTy);
7264   }
7265 
7266   /// Return the corresponding bits for a given map clause modifier. Add
7267   /// a flag marking the map as a pointer if requested. Add a flag marking the
7268   /// map as the first one of a series of maps that relate to the same map
7269   /// expression.
7270   OpenMPOffloadMappingFlags getMapTypeBits(
7271       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7272       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7273       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7274     OpenMPOffloadMappingFlags Bits =
7275         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7276     switch (MapType) {
7277     case OMPC_MAP_alloc:
7278     case OMPC_MAP_release:
7279       // alloc and release is the default behavior in the runtime library,  i.e.
7280       // if we don't pass any bits alloc/release that is what the runtime is
7281       // going to do. Therefore, we don't need to signal anything for these two
7282       // type modifiers.
7283       break;
7284     case OMPC_MAP_to:
7285       Bits |= OMP_MAP_TO;
7286       break;
7287     case OMPC_MAP_from:
7288       Bits |= OMP_MAP_FROM;
7289       break;
7290     case OMPC_MAP_tofrom:
7291       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7292       break;
7293     case OMPC_MAP_delete:
7294       Bits |= OMP_MAP_DELETE;
7295       break;
7296     case OMPC_MAP_unknown:
7297       llvm_unreachable("Unexpected map type!");
7298     }
7299     if (AddPtrFlag)
7300       Bits |= OMP_MAP_PTR_AND_OBJ;
7301     if (AddIsTargetParamFlag)
7302       Bits |= OMP_MAP_TARGET_PARAM;
7303     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7304         != MapModifiers.end())
7305       Bits |= OMP_MAP_ALWAYS;
7306     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7307         != MapModifiers.end())
7308       Bits |= OMP_MAP_CLOSE;
7309     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7310         != MapModifiers.end())
7311       Bits |= OMP_MAP_PRESENT;
7312     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7313         != MotionModifiers.end())
7314       Bits |= OMP_MAP_PRESENT;
7315     if (IsNonContiguous)
7316       Bits |= OMP_MAP_NON_CONTIG;
7317     return Bits;
7318   }
7319 
7320   /// Return true if the provided expression is a final array section. A
7321   /// final array section, is one whose length can't be proved to be one.
7322   bool isFinalArraySectionExpression(const Expr *E) const {
7323     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7324 
7325     // It is not an array section and therefore not a unity-size one.
7326     if (!OASE)
7327       return false;
7328 
7329     // An array section with no colon always refer to a single element.
7330     if (OASE->getColonLocFirst().isInvalid())
7331       return false;
7332 
7333     const Expr *Length = OASE->getLength();
7334 
7335     // If we don't have a length we have to check if the array has size 1
7336     // for this dimension. Also, we should always expect a length if the
7337     // base type is pointer.
7338     if (!Length) {
7339       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7340                              OASE->getBase()->IgnoreParenImpCasts())
7341                              .getCanonicalType();
7342       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7343         return ATy->getSize().getSExtValue() != 1;
7344       // If we don't have a constant dimension length, we have to consider
7345       // the current section as having any size, so it is not necessarily
7346       // unitary. If it happen to be unity size, that's user fault.
7347       return true;
7348     }
7349 
7350     // Check if the length evaluates to 1.
7351     Expr::EvalResult Result;
7352     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7353       return true; // Can have more that size 1.
7354 
7355     llvm::APSInt ConstLength = Result.Val.getInt();
7356     return ConstLength.getSExtValue() != 1;
7357   }
7358 
7359   /// Generate the base pointers, section pointers, sizes, map type bits, and
7360   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7361   /// map type, map or motion modifiers, and expression components.
7362   /// \a IsFirstComponent should be set to true if the provided set of
7363   /// components is the first associated with a capture.
7364   void generateInfoForComponentList(
7365       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7366       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7367       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7368       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7369       bool IsFirstComponentList, bool IsImplicit,
7370       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7371       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7372           OverlappedElements = llvm::None) const {
7373     // The following summarizes what has to be generated for each map and the
7374     // types below. The generated information is expressed in this order:
7375     // base pointer, section pointer, size, flags
7376     // (to add to the ones that come from the map type and modifier).
7377     //
7378     // double d;
7379     // int i[100];
7380     // float *p;
7381     //
7382     // struct S1 {
7383     //   int i;
7384     //   float f[50];
7385     // }
7386     // struct S2 {
7387     //   int i;
7388     //   float f[50];
7389     //   S1 s;
7390     //   double *p;
7391     //   struct S2 *ps;
7392     // }
7393     // S2 s;
7394     // S2 *ps;
7395     //
7396     // map(d)
7397     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7398     //
7399     // map(i)
7400     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7401     //
7402     // map(i[1:23])
7403     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7404     //
7405     // map(p)
7406     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7407     //
7408     // map(p[1:24])
7409     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7410     // in unified shared memory mode or for local pointers
7411     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7412     //
7413     // map(s)
7414     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7415     //
7416     // map(s.i)
7417     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7418     //
7419     // map(s.s.f)
7420     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7421     //
7422     // map(s.p)
7423     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7424     //
7425     // map(to: s.p[:22])
7426     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7427     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7428     // &(s.p), &(s.p[0]), 22*sizeof(double),
7429     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7430     // (*) alloc space for struct members, only this is a target parameter
7431     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7432     //      optimizes this entry out, same in the examples below)
7433     // (***) map the pointee (map: to)
7434     //
7435     // map(s.ps)
7436     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7437     //
7438     // map(from: s.ps->s.i)
7439     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7440     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7441     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7442     //
7443     // map(to: s.ps->ps)
7444     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7445     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7446     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7447     //
7448     // map(s.ps->ps->ps)
7449     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7450     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7451     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7452     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7453     //
7454     // map(to: s.ps->ps->s.f[:22])
7455     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7456     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7457     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7458     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7459     //
7460     // map(ps)
7461     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7462     //
7463     // map(ps->i)
7464     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7465     //
7466     // map(ps->s.f)
7467     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7468     //
7469     // map(from: ps->p)
7470     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7471     //
7472     // map(to: ps->p[:22])
7473     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7474     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7475     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7476     //
7477     // map(ps->ps)
7478     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7479     //
7480     // map(from: ps->ps->s.i)
7481     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7482     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7483     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7484     //
7485     // map(from: ps->ps->ps)
7486     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7487     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7488     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7489     //
7490     // map(ps->ps->ps->ps)
7491     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7492     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7493     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7494     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7495     //
7496     // map(to: ps->ps->ps->s.f[:22])
7497     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7498     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7499     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7500     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7501     //
7502     // map(to: s.f[:22]) map(from: s.p[:33])
7503     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7504     //     sizeof(double*) (**), TARGET_PARAM
7505     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7506     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7507     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7508     // (*) allocate contiguous space needed to fit all mapped members even if
7509     //     we allocate space for members not mapped (in this example,
7510     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7511     //     them as well because they fall between &s.f[0] and &s.p)
7512     //
7513     // map(from: s.f[:22]) map(to: ps->p[:33])
7514     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7515     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7516     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7517     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7518     // (*) the struct this entry pertains to is the 2nd element in the list of
7519     //     arguments, hence MEMBER_OF(2)
7520     //
7521     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7522     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7523     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7524     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7525     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7526     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7527     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7528     // (*) the struct this entry pertains to is the 4th element in the list
7529     //     of arguments, hence MEMBER_OF(4)
7530 
7531     // Track if the map information being generated is the first for a capture.
7532     bool IsCaptureFirstInfo = IsFirstComponentList;
7533     // When the variable is on a declare target link or in a to clause with
7534     // unified memory, a reference is needed to hold the host/device address
7535     // of the variable.
7536     bool RequiresReference = false;
7537 
7538     // Scan the components from the base to the complete expression.
7539     auto CI = Components.rbegin();
7540     auto CE = Components.rend();
7541     auto I = CI;
7542 
7543     // Track if the map information being generated is the first for a list of
7544     // components.
7545     bool IsExpressionFirstInfo = true;
7546     bool FirstPointerInComplexData = false;
7547     Address BP = Address::invalid();
7548     const Expr *AssocExpr = I->getAssociatedExpression();
7549     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7550     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7551     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7552 
7553     if (isa<MemberExpr>(AssocExpr)) {
7554       // The base is the 'this' pointer. The content of the pointer is going
7555       // to be the base of the field being mapped.
7556       BP = CGF.LoadCXXThisAddress();
7557     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7558                (OASE &&
7559                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7560       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7561     } else if (OAShE &&
7562                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7563       BP = Address(
7564           CGF.EmitScalarExpr(OAShE->getBase()),
7565           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7566     } else {
7567       // The base is the reference to the variable.
7568       // BP = &Var.
7569       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7570       if (const auto *VD =
7571               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7572         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7573                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7574           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7575               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7576                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7577             RequiresReference = true;
7578             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7579           }
7580         }
7581       }
7582 
7583       // If the variable is a pointer and is being dereferenced (i.e. is not
7584       // the last component), the base has to be the pointer itself, not its
7585       // reference. References are ignored for mapping purposes.
7586       QualType Ty =
7587           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7588       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7589         // No need to generate individual map information for the pointer, it
7590         // can be associated with the combined storage if shared memory mode is
7591         // active or the base declaration is not global variable.
7592         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7593         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7594             !VD || VD->hasLocalStorage())
7595           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7596         else
7597           FirstPointerInComplexData = true;
7598         ++I;
7599       }
7600     }
7601 
7602     // Track whether a component of the list should be marked as MEMBER_OF some
7603     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7604     // in a component list should be marked as MEMBER_OF, all subsequent entries
7605     // do not belong to the base struct. E.g.
7606     // struct S2 s;
7607     // s.ps->ps->ps->f[:]
7608     //   (1) (2) (3) (4)
7609     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7610     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7611     // is the pointee of ps(2) which is not member of struct s, so it should not
7612     // be marked as such (it is still PTR_AND_OBJ).
7613     // The variable is initialized to false so that PTR_AND_OBJ entries which
7614     // are not struct members are not considered (e.g. array of pointers to
7615     // data).
7616     bool ShouldBeMemberOf = false;
7617 
7618     // Variable keeping track of whether or not we have encountered a component
7619     // in the component list which is a member expression. Useful when we have a
7620     // pointer or a final array section, in which case it is the previous
7621     // component in the list which tells us whether we have a member expression.
7622     // E.g. X.f[:]
7623     // While processing the final array section "[:]" it is "f" which tells us
7624     // whether we are dealing with a member of a declared struct.
7625     const MemberExpr *EncounteredME = nullptr;
7626 
7627     // Track for the total number of dimension. Start from one for the dummy
7628     // dimension.
7629     uint64_t DimSize = 1;
7630 
7631     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7632 
7633     for (; I != CE; ++I) {
7634       // If the current component is member of a struct (parent struct) mark it.
7635       if (!EncounteredME) {
7636         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7637         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7638         // as MEMBER_OF the parent struct.
7639         if (EncounteredME) {
7640           ShouldBeMemberOf = true;
7641           // Do not emit as complex pointer if this is actually not array-like
7642           // expression.
7643           if (FirstPointerInComplexData) {
7644             QualType Ty = std::prev(I)
7645                               ->getAssociatedDeclaration()
7646                               ->getType()
7647                               .getNonReferenceType();
7648             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7649             FirstPointerInComplexData = false;
7650           }
7651         }
7652       }
7653 
7654       auto Next = std::next(I);
7655 
7656       // We need to generate the addresses and sizes if this is the last
7657       // component, if the component is a pointer or if it is an array section
7658       // whose length can't be proved to be one. If this is a pointer, it
7659       // becomes the base address for the following components.
7660 
7661       // A final array section, is one whose length can't be proved to be one.
7662       // If the map item is non-contiguous then we don't treat any array section
7663       // as final array section.
7664       bool IsFinalArraySection =
7665           !IsNonContiguous &&
7666           isFinalArraySectionExpression(I->getAssociatedExpression());
7667 
7668       // Get information on whether the element is a pointer. Have to do a
7669       // special treatment for array sections given that they are built-in
7670       // types.
7671       const auto *OASE =
7672           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7673       const auto *OAShE =
7674           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7675       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7676       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7677       bool IsPointer =
7678           OAShE ||
7679           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7680                        .getCanonicalType()
7681                        ->isAnyPointerType()) ||
7682           I->getAssociatedExpression()->getType()->isAnyPointerType();
7683       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7684 
7685       if (OASE)
7686         ++DimSize;
7687 
7688       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7689         // If this is not the last component, we expect the pointer to be
7690         // associated with an array expression or member expression.
7691         assert((Next == CE ||
7692                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7693                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7694                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7695                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7696                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7697                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7698                "Unexpected expression");
7699 
7700         Address LB = Address::invalid();
7701         if (OAShE) {
7702           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7703                        CGF.getContext().getTypeAlignInChars(
7704                            OAShE->getBase()->getType()));
7705         } else {
7706           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7707                    .getAddress(CGF);
7708         }
7709 
7710         // If this component is a pointer inside the base struct then we don't
7711         // need to create any entry for it - it will be combined with the object
7712         // it is pointing to into a single PTR_AND_OBJ entry.
7713         bool IsMemberPointerOrAddr =
7714             (IsPointer || ForDeviceAddr) && EncounteredME &&
7715             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7716              EncounteredME);
7717         if (!OverlappedElements.empty()) {
7718           // Handle base element with the info for overlapped elements.
7719           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7720           assert(Next == CE &&
7721                  "Expected last element for the overlapped elements.");
7722           assert(!IsPointer &&
7723                  "Unexpected base element with the pointer type.");
7724           // Mark the whole struct as the struct that requires allocation on the
7725           // device.
7726           PartialStruct.LowestElem = {0, LB};
7727           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7728               I->getAssociatedExpression()->getType());
7729           Address HB = CGF.Builder.CreateConstGEP(
7730               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7731                                                               CGF.VoidPtrTy),
7732               TypeSize.getQuantity() - 1);
7733           PartialStruct.HighestElem = {
7734               std::numeric_limits<decltype(
7735                   PartialStruct.HighestElem.first)>::max(),
7736               HB};
7737           PartialStruct.Base = BP;
7738           // Emit data for non-overlapped data.
7739           OpenMPOffloadMappingFlags Flags =
7740               OMP_MAP_MEMBER_OF |
7741               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7742                              /*AddPtrFlag=*/false,
7743                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7744           LB = BP;
7745           llvm::Value *Size = nullptr;
7746           // Do bitcopy of all non-overlapped structure elements.
7747           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7748                    Component : OverlappedElements) {
7749             Address ComponentLB = Address::invalid();
7750             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7751                  Component) {
7752               if (MC.getAssociatedDeclaration()) {
7753                 ComponentLB =
7754                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7755                         .getAddress(CGF);
7756                 Size = CGF.Builder.CreatePtrDiff(
7757                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7758                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7759                 break;
7760               }
7761             }
7762             assert(Size && "Failed to determine structure size");
7763             CombinedInfo.BasePointers.push_back(BP.getPointer());
7764             CombinedInfo.Pointers.push_back(LB.getPointer());
7765             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7766                 Size, CGF.Int64Ty, /*isSigned=*/true));
7767             CombinedInfo.Types.push_back(Flags);
7768             CombinedInfo.Mappers.push_back(nullptr);
7769             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7770                                                                       : 1);
7771             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7772           }
7773           CombinedInfo.BasePointers.push_back(BP.getPointer());
7774           CombinedInfo.Pointers.push_back(LB.getPointer());
7775           Size = CGF.Builder.CreatePtrDiff(
7776               CGF.EmitCastToVoidPtr(
7777                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7778               CGF.EmitCastToVoidPtr(LB.getPointer()));
7779           CombinedInfo.Sizes.push_back(
7780               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7781           CombinedInfo.Types.push_back(Flags);
7782           CombinedInfo.Mappers.push_back(nullptr);
7783           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7784                                                                     : 1);
7785           break;
7786         }
7787         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7788         if (!IsMemberPointerOrAddr) {
7789           CombinedInfo.BasePointers.push_back(BP.getPointer());
7790           CombinedInfo.Pointers.push_back(LB.getPointer());
7791           CombinedInfo.Sizes.push_back(
7792               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7793           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7794                                                                     : 1);
7795 
7796           // If Mapper is valid, the last component inherits the mapper.
7797           bool HasMapper = Mapper && Next == CE;
7798           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7799 
7800           // We need to add a pointer flag for each map that comes from the
7801           // same expression except for the first one. We also need to signal
7802           // this map is the first one that relates with the current capture
7803           // (there is a set of entries for each capture).
7804           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7805               MapType, MapModifiers, MotionModifiers, IsImplicit,
7806               !IsExpressionFirstInfo || RequiresReference ||
7807                   FirstPointerInComplexData,
7808               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7809 
7810           if (!IsExpressionFirstInfo) {
7811             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7812             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7813             if (IsPointer)
7814               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7815                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7816 
7817             if (ShouldBeMemberOf) {
7818               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7819               // should be later updated with the correct value of MEMBER_OF.
7820               Flags |= OMP_MAP_MEMBER_OF;
7821               // From now on, all subsequent PTR_AND_OBJ entries should not be
7822               // marked as MEMBER_OF.
7823               ShouldBeMemberOf = false;
7824             }
7825           }
7826 
7827           CombinedInfo.Types.push_back(Flags);
7828         }
7829 
7830         // If we have encountered a member expression so far, keep track of the
7831         // mapped member. If the parent is "*this", then the value declaration
7832         // is nullptr.
7833         if (EncounteredME) {
7834           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7835           unsigned FieldIndex = FD->getFieldIndex();
7836 
7837           // Update info about the lowest and highest elements for this struct
7838           if (!PartialStruct.Base.isValid()) {
7839             PartialStruct.LowestElem = {FieldIndex, LB};
7840             if (IsFinalArraySection) {
7841               Address HB =
7842                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7843                       .getAddress(CGF);
7844               PartialStruct.HighestElem = {FieldIndex, HB};
7845             } else {
7846               PartialStruct.HighestElem = {FieldIndex, LB};
7847             }
7848             PartialStruct.Base = BP;
7849           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7850             PartialStruct.LowestElem = {FieldIndex, LB};
7851           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7852             PartialStruct.HighestElem = {FieldIndex, LB};
7853           }
7854         }
7855 
7856         // If we have a final array section, we are done with this expression.
7857         if (IsFinalArraySection)
7858           break;
7859 
7860         // The pointer becomes the base for the next element.
7861         if (Next != CE)
7862           BP = LB;
7863 
7864         IsExpressionFirstInfo = false;
7865         IsCaptureFirstInfo = false;
7866         FirstPointerInComplexData = false;
7867       }
7868     }
7869 
7870     if (!IsNonContiguous)
7871       return;
7872 
7873     const ASTContext &Context = CGF.getContext();
7874 
7875     // For supporting stride in array section, we need to initialize the first
7876     // dimension size as 1, first offset as 0, and first count as 1
7877     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7878     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7879     MapValuesArrayTy CurStrides;
7880     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7881     uint64_t ElementTypeSize;
7882 
7883     // Collect Size information for each dimension and get the element size as
7884     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7885     // should be [10, 10] and the first stride is 4 btyes.
7886     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7887          Components) {
7888       const Expr *AssocExpr = Component.getAssociatedExpression();
7889       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7890 
7891       if (!OASE)
7892         continue;
7893 
7894       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7895       auto *CAT = Context.getAsConstantArrayType(Ty);
7896       auto *VAT = Context.getAsVariableArrayType(Ty);
7897 
7898       // We need all the dimension size except for the last dimension.
7899       assert((VAT || CAT || &Component == &*Components.begin()) &&
7900              "Should be either ConstantArray or VariableArray if not the "
7901              "first Component");
7902 
7903       // Get element size if CurStrides is empty.
7904       if (CurStrides.empty()) {
7905         const Type *ElementType = nullptr;
7906         if (CAT)
7907           ElementType = CAT->getElementType().getTypePtr();
7908         else if (VAT)
7909           ElementType = VAT->getElementType().getTypePtr();
7910         else
7911           assert(&Component == &*Components.begin() &&
7912                  "Only expect pointer (non CAT or VAT) when this is the "
7913                  "first Component");
7914         // If ElementType is null, then it means the base is a pointer
7915         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7916         // for next iteration.
7917         if (ElementType) {
7918           // For the case that having pointer as base, we need to remove one
7919           // level of indirection.
7920           if (&Component != &*Components.begin())
7921             ElementType = ElementType->getPointeeOrArrayElementType();
7922           ElementTypeSize =
7923               Context.getTypeSizeInChars(ElementType).getQuantity();
7924           CurStrides.push_back(
7925               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7926         }
7927       }
7928       // Get dimension value except for the last dimension since we don't need
7929       // it.
7930       if (DimSizes.size() < Components.size() - 1) {
7931         if (CAT)
7932           DimSizes.push_back(llvm::ConstantInt::get(
7933               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7934         else if (VAT)
7935           DimSizes.push_back(CGF.Builder.CreateIntCast(
7936               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7937               /*IsSigned=*/false));
7938       }
7939     }
7940 
7941     // Skip the dummy dimension since we have already have its information.
7942     auto DI = DimSizes.begin() + 1;
7943     // Product of dimension.
7944     llvm::Value *DimProd =
7945         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7946 
7947     // Collect info for non-contiguous. Notice that offset, count, and stride
7948     // are only meaningful for array-section, so we insert a null for anything
7949     // other than array-section.
7950     // Also, the size of offset, count, and stride are not the same as
7951     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7952     // count, and stride are the same as the number of non-contiguous
7953     // declaration in target update to/from clause.
7954     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7955          Components) {
7956       const Expr *AssocExpr = Component.getAssociatedExpression();
7957 
7958       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7959         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7960             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7961             /*isSigned=*/false);
7962         CurOffsets.push_back(Offset);
7963         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7964         CurStrides.push_back(CurStrides.back());
7965         continue;
7966       }
7967 
7968       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7969 
7970       if (!OASE)
7971         continue;
7972 
7973       // Offset
7974       const Expr *OffsetExpr = OASE->getLowerBound();
7975       llvm::Value *Offset = nullptr;
7976       if (!OffsetExpr) {
7977         // If offset is absent, then we just set it to zero.
7978         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7979       } else {
7980         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7981                                            CGF.Int64Ty,
7982                                            /*isSigned=*/false);
7983       }
7984       CurOffsets.push_back(Offset);
7985 
7986       // Count
7987       const Expr *CountExpr = OASE->getLength();
7988       llvm::Value *Count = nullptr;
7989       if (!CountExpr) {
7990         // In Clang, once a high dimension is an array section, we construct all
7991         // the lower dimension as array section, however, for case like
7992         // arr[0:2][2], Clang construct the inner dimension as an array section
7993         // but it actually is not in an array section form according to spec.
7994         if (!OASE->getColonLocFirst().isValid() &&
7995             !OASE->getColonLocSecond().isValid()) {
7996           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7997         } else {
7998           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7999           // When the length is absent it defaults to ⌈(size −
8000           // lower-bound)/stride⌉, where size is the size of the array
8001           // dimension.
8002           const Expr *StrideExpr = OASE->getStride();
8003           llvm::Value *Stride =
8004               StrideExpr
8005                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8006                                               CGF.Int64Ty, /*isSigned=*/false)
8007                   : nullptr;
8008           if (Stride)
8009             Count = CGF.Builder.CreateUDiv(
8010                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8011           else
8012             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8013         }
8014       } else {
8015         Count = CGF.EmitScalarExpr(CountExpr);
8016       }
8017       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8018       CurCounts.push_back(Count);
8019 
8020       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8021       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8022       //              Offset      Count     Stride
8023       //    D0          0           1         4    (int)    <- dummy dimension
8024       //    D1          0           2         8    (2 * (1) * 4)
8025       //    D2          1           2         20   (1 * (1 * 5) * 4)
8026       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8027       const Expr *StrideExpr = OASE->getStride();
8028       llvm::Value *Stride =
8029           StrideExpr
8030               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8031                                           CGF.Int64Ty, /*isSigned=*/false)
8032               : nullptr;
8033       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8034       if (Stride)
8035         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8036       else
8037         CurStrides.push_back(DimProd);
8038       if (DI != DimSizes.end())
8039         ++DI;
8040     }
8041 
8042     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8043     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8044     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8045   }
8046 
8047   /// Return the adjusted map modifiers if the declaration a capture refers to
8048   /// appears in a first-private clause. This is expected to be used only with
8049   /// directives that start with 'target'.
8050   MappableExprsHandler::OpenMPOffloadMappingFlags
8051   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8052     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8053 
8054     // A first private variable captured by reference will use only the
8055     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8056     // declaration is known as first-private in this handler.
8057     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8058       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8059           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8060         return MappableExprsHandler::OMP_MAP_ALWAYS |
8061                MappableExprsHandler::OMP_MAP_TO;
8062       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8063         return MappableExprsHandler::OMP_MAP_TO |
8064                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8065       return MappableExprsHandler::OMP_MAP_PRIVATE |
8066              MappableExprsHandler::OMP_MAP_TO;
8067     }
8068     return MappableExprsHandler::OMP_MAP_TO |
8069            MappableExprsHandler::OMP_MAP_FROM;
8070   }
8071 
8072   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8073     // Rotate by getFlagMemberOffset() bits.
8074     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8075                                                   << getFlagMemberOffset());
8076   }
8077 
8078   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8079                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8080     // If the entry is PTR_AND_OBJ but has not been marked with the special
8081     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8082     // marked as MEMBER_OF.
8083     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8084         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8085       return;
8086 
8087     // Reset the placeholder value to prepare the flag for the assignment of the
8088     // proper MEMBER_OF value.
8089     Flags &= ~OMP_MAP_MEMBER_OF;
8090     Flags |= MemberOfFlag;
8091   }
8092 
8093   void getPlainLayout(const CXXRecordDecl *RD,
8094                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8095                       bool AsBase) const {
8096     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8097 
8098     llvm::StructType *St =
8099         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8100 
8101     unsigned NumElements = St->getNumElements();
8102     llvm::SmallVector<
8103         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8104         RecordLayout(NumElements);
8105 
8106     // Fill bases.
8107     for (const auto &I : RD->bases()) {
8108       if (I.isVirtual())
8109         continue;
8110       const auto *Base = I.getType()->getAsCXXRecordDecl();
8111       // Ignore empty bases.
8112       if (Base->isEmpty() || CGF.getContext()
8113                                  .getASTRecordLayout(Base)
8114                                  .getNonVirtualSize()
8115                                  .isZero())
8116         continue;
8117 
8118       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8119       RecordLayout[FieldIndex] = Base;
8120     }
8121     // Fill in virtual bases.
8122     for (const auto &I : RD->vbases()) {
8123       const auto *Base = I.getType()->getAsCXXRecordDecl();
8124       // Ignore empty bases.
8125       if (Base->isEmpty())
8126         continue;
8127       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8128       if (RecordLayout[FieldIndex])
8129         continue;
8130       RecordLayout[FieldIndex] = Base;
8131     }
8132     // Fill in all the fields.
8133     assert(!RD->isUnion() && "Unexpected union.");
8134     for (const auto *Field : RD->fields()) {
8135       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8136       // will fill in later.)
8137       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8138         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8139         RecordLayout[FieldIndex] = Field;
8140       }
8141     }
8142     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8143              &Data : RecordLayout) {
8144       if (Data.isNull())
8145         continue;
8146       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8147         getPlainLayout(Base, Layout, /*AsBase=*/true);
8148       else
8149         Layout.push_back(Data.get<const FieldDecl *>());
8150     }
8151   }
8152 
8153 public:
8154   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8155       : CurDir(&Dir), CGF(CGF) {
8156     // Extract firstprivate clause information.
8157     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8158       for (const auto *D : C->varlists())
8159         FirstPrivateDecls.try_emplace(
8160             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8161     // Extract implicit firstprivates from uses_allocators clauses.
8162     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8163       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8164         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8165         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8166           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8167                                         /*Implicit=*/true);
8168         else if (const auto *VD = dyn_cast<VarDecl>(
8169                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8170                          ->getDecl()))
8171           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8172       }
8173     }
8174     // Extract device pointer clause information.
8175     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8176       for (auto L : C->component_lists())
8177         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8178   }
8179 
8180   /// Constructor for the declare mapper directive.
8181   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8182       : CurDir(&Dir), CGF(CGF) {}
8183 
8184   /// Generate code for the combined entry if we have a partially mapped struct
8185   /// and take care of the mapping flags of the arguments corresponding to
8186   /// individual struct members.
8187   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8188                          MapFlagsArrayTy &CurTypes,
8189                          const StructRangeInfoTy &PartialStruct,
8190                          bool NotTargetParams = false) const {
8191     // Base is the base of the struct
8192     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8193     // Pointer is the address of the lowest element
8194     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8195     CombinedInfo.Pointers.push_back(LB);
8196     // There should not be a mapper for a combined entry.
8197     CombinedInfo.Mappers.push_back(nullptr);
8198     // Size is (addr of {highest+1} element) - (addr of lowest element)
8199     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8200     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8201     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8202     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8203     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8204     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8205                                                   /*isSigned=*/false);
8206     CombinedInfo.Sizes.push_back(Size);
8207     // Map type is always TARGET_PARAM, if generate info for captures.
8208     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8209                                                  : OMP_MAP_TARGET_PARAM);
8210     // If any element has the present modifier, then make sure the runtime
8211     // doesn't attempt to allocate the struct.
8212     if (CurTypes.end() !=
8213         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8214           return Type & OMP_MAP_PRESENT;
8215         }))
8216       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8217     // Remove TARGET_PARAM flag from the first element
8218     CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
8219 
8220     // All other current entries will be MEMBER_OF the combined entry
8221     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8222     // 0xFFFF in the MEMBER_OF field).
8223     OpenMPOffloadMappingFlags MemberOfFlag =
8224         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8225     for (auto &M : CurTypes)
8226       setCorrectMemberOfFlag(M, MemberOfFlag);
8227   }
8228 
8229   /// Generate all the base pointers, section pointers, sizes, map types, and
8230   /// mappers for the extracted mappable expressions (all included in \a
8231   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8232   /// pair of the relevant declaration and index where it occurs is appended to
8233   /// the device pointers info array.
8234   void generateAllInfo(
8235       MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false,
8236       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8237           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8238     // We have to process the component lists that relate with the same
8239     // declaration in a single chunk so that we can generate the map flags
8240     // correctly. Therefore, we organize all lists in a map.
8241     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8242 
8243     // Helper function to fill the information map for the different supported
8244     // clauses.
8245     auto &&InfoGen =
8246         [&Info, &SkipVarSet](
8247             const ValueDecl *D,
8248             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8249             OpenMPMapClauseKind MapType,
8250             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8251             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8252             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8253             bool ForDeviceAddr = false) {
8254           const ValueDecl *VD =
8255               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8256           if (SkipVarSet.count(VD))
8257             return;
8258           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8259                                 ReturnDevicePointer, IsImplicit, Mapper,
8260                                 ForDeviceAddr);
8261         };
8262 
8263     assert(CurDir.is<const OMPExecutableDirective *>() &&
8264            "Expect a executable directive");
8265     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8266     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8267       for (const auto L : C->component_lists()) {
8268         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8269                 C->getMapTypeModifiers(), llvm::None,
8270                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8271       }
8272     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8273       for (const auto L : C->component_lists()) {
8274         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8275                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8276                 C->isImplicit(), std::get<2>(L));
8277       }
8278     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8279       for (const auto L : C->component_lists()) {
8280         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8281                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8282                 C->isImplicit(), std::get<2>(L));
8283       }
8284 
8285     // Look at the use_device_ptr clause information and mark the existing map
8286     // entries as such. If there is no map information for an entry in the
8287     // use_device_ptr list, we create one with map type 'alloc' and zero size
8288     // section. It is the user fault if that was not mapped before. If there is
8289     // no map information and the pointer is a struct member, then we defer the
8290     // emission of that entry until the whole struct has been processed.
8291     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8292         DeferredInfo;
8293     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8294 
8295     for (const auto *C :
8296          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8297       for (const auto L : C->component_lists()) {
8298         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8299             std::get<1>(L);
8300         assert(!Components.empty() &&
8301                "Not expecting empty list of components!");
8302         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8303         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8304         const Expr *IE = Components.back().getAssociatedExpression();
8305         // If the first component is a member expression, we have to look into
8306         // 'this', which maps to null in the map of map information. Otherwise
8307         // look directly for the information.
8308         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8309 
8310         // We potentially have map information for this declaration already.
8311         // Look for the first set of components that refer to it.
8312         if (It != Info.end()) {
8313           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8314             return MI.Components.back().getAssociatedDeclaration() == VD;
8315           });
8316           // If we found a map entry, signal that the pointer has to be returned
8317           // and move on to the next declaration.
8318           // Exclude cases where the base pointer is mapped as array subscript,
8319           // array section or array shaping. The base address is passed as a
8320           // pointer to base in this case and cannot be used as a base for
8321           // use_device_ptr list item.
8322           if (CI != It->second.end()) {
8323             auto PrevCI = std::next(CI->Components.rbegin());
8324             const auto *VarD = dyn_cast<VarDecl>(VD);
8325             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8326                 isa<MemberExpr>(IE) ||
8327                 !VD->getType().getNonReferenceType()->isPointerType() ||
8328                 PrevCI == CI->Components.rend() ||
8329                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8330                 VarD->hasLocalStorage()) {
8331               CI->ReturnDevicePointer = true;
8332               continue;
8333             }
8334           }
8335         }
8336 
8337         // We didn't find any match in our map information - generate a zero
8338         // size array section - if the pointer is a struct member we defer this
8339         // action until the whole struct has been processed.
8340         if (isa<MemberExpr>(IE)) {
8341           // Insert the pointer into Info to be processed by
8342           // generateInfoForComponentList. Because it is a member pointer
8343           // without a pointee, no entry will be generated for it, therefore
8344           // we need to generate one after the whole struct has been processed.
8345           // Nonetheless, generateInfoForComponentList must be called to take
8346           // the pointer into account for the calculation of the range of the
8347           // partial struct.
8348           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8349                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8350           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8351         } else {
8352           llvm::Value *Ptr =
8353               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8354           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8355           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8356           UseDevicePtrCombinedInfo.Sizes.push_back(
8357               llvm::Constant::getNullValue(CGF.Int64Ty));
8358           UseDevicePtrCombinedInfo.Types.push_back(
8359               OMP_MAP_RETURN_PARAM |
8360               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8361           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8362         }
8363       }
8364     }
8365 
8366     // Look at the use_device_addr clause information and mark the existing map
8367     // entries as such. If there is no map information for an entry in the
8368     // use_device_addr list, we create one with map type 'alloc' and zero size
8369     // section. It is the user fault if that was not mapped before. If there is
8370     // no map information and the pointer is a struct member, then we defer the
8371     // emission of that entry until the whole struct has been processed.
8372     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8373     for (const auto *C :
8374          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8375       for (const auto L : C->component_lists()) {
8376         assert(!std::get<1>(L).empty() &&
8377                "Not expecting empty list of components!");
8378         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8379         if (!Processed.insert(VD).second)
8380           continue;
8381         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8382         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8383         // If the first component is a member expression, we have to look into
8384         // 'this', which maps to null in the map of map information. Otherwise
8385         // look directly for the information.
8386         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8387 
8388         // We potentially have map information for this declaration already.
8389         // Look for the first set of components that refer to it.
8390         if (It != Info.end()) {
8391           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8392             return MI.Components.back().getAssociatedDeclaration() == VD;
8393           });
8394           // If we found a map entry, signal that the pointer has to be returned
8395           // and move on to the next declaration.
8396           if (CI != It->second.end()) {
8397             CI->ReturnDevicePointer = true;
8398             continue;
8399           }
8400         }
8401 
8402         // We didn't find any match in our map information - generate a zero
8403         // size array section - if the pointer is a struct member we defer this
8404         // action until the whole struct has been processed.
8405         if (isa<MemberExpr>(IE)) {
8406           // Insert the pointer into Info to be processed by
8407           // generateInfoForComponentList. Because it is a member pointer
8408           // without a pointee, no entry will be generated for it, therefore
8409           // we need to generate one after the whole struct has been processed.
8410           // Nonetheless, generateInfoForComponentList must be called to take
8411           // the pointer into account for the calculation of the range of the
8412           // partial struct.
8413           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8414                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8415                   nullptr, /*ForDeviceAddr=*/true);
8416           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8417         } else {
8418           llvm::Value *Ptr;
8419           if (IE->isGLValue())
8420             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8421           else
8422             Ptr = CGF.EmitScalarExpr(IE);
8423           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8424           CombinedInfo.Pointers.push_back(Ptr);
8425           CombinedInfo.Sizes.push_back(
8426               llvm::Constant::getNullValue(CGF.Int64Ty));
8427           CombinedInfo.Types.push_back(
8428               OMP_MAP_RETURN_PARAM |
8429               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8430           CombinedInfo.Mappers.push_back(nullptr);
8431         }
8432       }
8433     }
8434 
8435     for (const auto &M : Info) {
8436       // We need to know when we generate information for the first component
8437       // associated with a capture, because the mapping flags depend on it.
8438       bool IsFirstComponentList = !NotTargetParams;
8439 
8440       // Temporary generated information.
8441       MapCombinedInfoTy CurInfo;
8442       StructRangeInfoTy PartialStruct;
8443 
8444       for (const MapInfo &L : M.second) {
8445         assert(!L.Components.empty() &&
8446                "Not expecting declaration with no component lists.");
8447 
8448         // Remember the current base pointer index.
8449         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8450         CurInfo.NonContigInfo.IsNonContiguous =
8451             L.Components.back().isNonContiguous();
8452         generateInfoForComponentList(L.MapType, L.MapModifiers,
8453                                      L.MotionModifiers, L.Components, CurInfo,
8454                                      PartialStruct, IsFirstComponentList,
8455                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8456 
8457         // If this entry relates with a device pointer, set the relevant
8458         // declaration and add the 'return pointer' flag.
8459         if (L.ReturnDevicePointer) {
8460           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8461                  "Unexpected number of mapped base pointers.");
8462 
8463           const ValueDecl *RelevantVD =
8464               L.Components.back().getAssociatedDeclaration();
8465           assert(RelevantVD &&
8466                  "No relevant declaration related with device pointer??");
8467 
8468           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8469               RelevantVD);
8470           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8471         }
8472         IsFirstComponentList = false;
8473       }
8474 
8475       // Append any pending zero-length pointers which are struct members and
8476       // used with use_device_ptr or use_device_addr.
8477       auto CI = DeferredInfo.find(M.first);
8478       if (CI != DeferredInfo.end()) {
8479         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8480           llvm::Value *BasePtr;
8481           llvm::Value *Ptr;
8482           if (L.ForDeviceAddr) {
8483             if (L.IE->isGLValue())
8484               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8485             else
8486               Ptr = this->CGF.EmitScalarExpr(L.IE);
8487             BasePtr = Ptr;
8488             // Entry is RETURN_PARAM. Also, set the placeholder value
8489             // MEMBER_OF=FFFF so that the entry is later updated with the
8490             // correct value of MEMBER_OF.
8491             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8492           } else {
8493             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8494             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8495                                              L.IE->getExprLoc());
8496             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8497             // value MEMBER_OF=FFFF so that the entry is later updated with the
8498             // correct value of MEMBER_OF.
8499             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8500                                     OMP_MAP_MEMBER_OF);
8501           }
8502           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8503           CurInfo.Pointers.push_back(Ptr);
8504           CurInfo.Sizes.push_back(
8505               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8506           CurInfo.Mappers.push_back(nullptr);
8507         }
8508       }
8509 
8510       // If there is an entry in PartialStruct it means we have a struct with
8511       // individual members mapped. Emit an extra combined entry.
8512       if (PartialStruct.Base.isValid())
8513         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8514                           NotTargetParams);
8515 
8516       // We need to append the results of this capture to what we already have.
8517       CombinedInfo.append(CurInfo);
8518     }
8519     // Append data for use_device_ptr clauses.
8520     CombinedInfo.append(UseDevicePtrCombinedInfo);
8521   }
8522 
8523   /// Generate all the base pointers, section pointers, sizes, map types, and
8524   /// mappers for the extracted map clauses of user-defined mapper (all included
8525   /// in \a CombinedInfo).
8526   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8527     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8528            "Expect a declare mapper directive");
8529     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8530     // We have to process the component lists that relate with the same
8531     // declaration in a single chunk so that we can generate the map flags
8532     // correctly. Therefore, we organize all lists in a map.
8533     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8534 
8535     // Fill the information map for map clauses.
8536     for (const auto *C : CurMapperDir->clauselists()) {
8537       const auto *MC = cast<OMPMapClause>(C);
8538       for (const auto L : MC->component_lists()) {
8539         const ValueDecl *VD =
8540             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8541                            : nullptr;
8542         // Get the corresponding user-defined mapper.
8543         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8544                               MC->getMapTypeModifiers(), llvm::None,
8545                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8546                               std::get<2>(L));
8547       }
8548     }
8549 
8550     for (const auto &M : Info) {
8551       // We need to know when we generate information for the first component
8552       // associated with a capture, because the mapping flags depend on it.
8553       bool IsFirstComponentList = true;
8554 
8555       // Temporary generated information.
8556       MapCombinedInfoTy CurInfo;
8557       StructRangeInfoTy PartialStruct;
8558 
8559       for (const MapInfo &L : M.second) {
8560         assert(!L.Components.empty() &&
8561                "Not expecting declaration with no component lists.");
8562         generateInfoForComponentList(L.MapType, L.MapModifiers,
8563                                      L.MotionModifiers, L.Components, CurInfo,
8564                                      PartialStruct, IsFirstComponentList,
8565                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8566         IsFirstComponentList = false;
8567       }
8568 
8569       // If there is an entry in PartialStruct it means we have a struct with
8570       // individual members mapped. Emit an extra combined entry.
8571       if (PartialStruct.Base.isValid()) {
8572         CurInfo.NonContigInfo.Dims.push_back(0);
8573         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8574       }
8575 
8576       // We need to append the results of this capture to what we already have.
8577       CombinedInfo.append(CurInfo);
8578     }
8579   }
8580 
8581   /// Emit capture info for lambdas for variables captured by reference.
8582   void generateInfoForLambdaCaptures(
8583       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8584       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8585     const auto *RD = VD->getType()
8586                          .getCanonicalType()
8587                          .getNonReferenceType()
8588                          ->getAsCXXRecordDecl();
8589     if (!RD || !RD->isLambda())
8590       return;
8591     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8592     LValue VDLVal = CGF.MakeAddrLValue(
8593         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8594     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8595     FieldDecl *ThisCapture = nullptr;
8596     RD->getCaptureFields(Captures, ThisCapture);
8597     if (ThisCapture) {
8598       LValue ThisLVal =
8599           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8600       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8601       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8602                                  VDLVal.getPointer(CGF));
8603       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8604       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8605       CombinedInfo.Sizes.push_back(
8606           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8607                                     CGF.Int64Ty, /*isSigned=*/true));
8608       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8609                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8610       CombinedInfo.Mappers.push_back(nullptr);
8611     }
8612     for (const LambdaCapture &LC : RD->captures()) {
8613       if (!LC.capturesVariable())
8614         continue;
8615       const VarDecl *VD = LC.getCapturedVar();
8616       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8617         continue;
8618       auto It = Captures.find(VD);
8619       assert(It != Captures.end() && "Found lambda capture without field.");
8620       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8621       if (LC.getCaptureKind() == LCK_ByRef) {
8622         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8623         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8624                                    VDLVal.getPointer(CGF));
8625         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8626         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8627         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8628             CGF.getTypeSize(
8629                 VD->getType().getCanonicalType().getNonReferenceType()),
8630             CGF.Int64Ty, /*isSigned=*/true));
8631       } else {
8632         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8633         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8634                                    VDLVal.getPointer(CGF));
8635         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8636         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8637         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8638       }
8639       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8640                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8641       CombinedInfo.Mappers.push_back(nullptr);
8642     }
8643   }
8644 
8645   /// Set correct indices for lambdas captures.
8646   void adjustMemberOfForLambdaCaptures(
8647       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8648       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8649       MapFlagsArrayTy &Types) const {
8650     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8651       // Set correct member_of idx for all implicit lambda captures.
8652       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8653                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8654         continue;
8655       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8656       assert(BasePtr && "Unable to find base lambda address.");
8657       int TgtIdx = -1;
8658       for (unsigned J = I; J > 0; --J) {
8659         unsigned Idx = J - 1;
8660         if (Pointers[Idx] != BasePtr)
8661           continue;
8662         TgtIdx = Idx;
8663         break;
8664       }
8665       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8666       // All other current entries will be MEMBER_OF the combined entry
8667       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8668       // 0xFFFF in the MEMBER_OF field).
8669       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8670       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8671     }
8672   }
8673 
8674   /// Generate the base pointers, section pointers, sizes, map types, and
8675   /// mappers associated to a given capture (all included in \a CombinedInfo).
8676   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8677                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8678                               StructRangeInfoTy &PartialStruct) const {
8679     assert(!Cap->capturesVariableArrayType() &&
8680            "Not expecting to generate map info for a variable array type!");
8681 
8682     // We need to know when we generating information for the first component
8683     const ValueDecl *VD = Cap->capturesThis()
8684                               ? nullptr
8685                               : Cap->getCapturedVar()->getCanonicalDecl();
8686 
8687     // If this declaration appears in a is_device_ptr clause we just have to
8688     // pass the pointer by value. If it is a reference to a declaration, we just
8689     // pass its value.
8690     if (DevPointersMap.count(VD)) {
8691       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8692       CombinedInfo.Pointers.push_back(Arg);
8693       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8694           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8695           /*isSigned=*/true));
8696       CombinedInfo.Types.push_back(
8697           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8698           OMP_MAP_TARGET_PARAM);
8699       CombinedInfo.Mappers.push_back(nullptr);
8700       return;
8701     }
8702 
8703     using MapData =
8704         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8705                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8706                    const ValueDecl *>;
8707     SmallVector<MapData, 4> DeclComponentLists;
8708     assert(CurDir.is<const OMPExecutableDirective *>() &&
8709            "Expect a executable directive");
8710     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8711     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8712       for (const auto L : C->decl_component_lists(VD)) {
8713         const ValueDecl *VDecl, *Mapper;
8714         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8715         std::tie(VDecl, Components, Mapper) = L;
8716         assert(VDecl == VD && "We got information for the wrong declaration??");
8717         assert(!Components.empty() &&
8718                "Not expecting declaration with no component lists.");
8719         DeclComponentLists.emplace_back(Components, C->getMapType(),
8720                                         C->getMapTypeModifiers(),
8721                                         C->isImplicit(), Mapper);
8722       }
8723     }
8724 
8725     // Find overlapping elements (including the offset from the base element).
8726     llvm::SmallDenseMap<
8727         const MapData *,
8728         llvm::SmallVector<
8729             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8730         4>
8731         OverlappedData;
8732     size_t Count = 0;
8733     for (const MapData &L : DeclComponentLists) {
8734       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8735       OpenMPMapClauseKind MapType;
8736       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8737       bool IsImplicit;
8738       const ValueDecl *Mapper;
8739       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8740       ++Count;
8741       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8742         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8743         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8744         auto CI = Components.rbegin();
8745         auto CE = Components.rend();
8746         auto SI = Components1.rbegin();
8747         auto SE = Components1.rend();
8748         for (; CI != CE && SI != SE; ++CI, ++SI) {
8749           if (CI->getAssociatedExpression()->getStmtClass() !=
8750               SI->getAssociatedExpression()->getStmtClass())
8751             break;
8752           // Are we dealing with different variables/fields?
8753           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8754             break;
8755         }
8756         // Found overlapping if, at least for one component, reached the head of
8757         // the components list.
8758         if (CI == CE || SI == SE) {
8759           assert((CI != CE || SI != SE) &&
8760                  "Unexpected full match of the mapping components.");
8761           const MapData &BaseData = CI == CE ? L : L1;
8762           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8763               SI == SE ? Components : Components1;
8764           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8765           OverlappedElements.getSecond().push_back(SubData);
8766         }
8767       }
8768     }
8769     // Sort the overlapped elements for each item.
8770     llvm::SmallVector<const FieldDecl *, 4> Layout;
8771     if (!OverlappedData.empty()) {
8772       if (const auto *CRD =
8773               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8774         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8775       else {
8776         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8777         Layout.append(RD->field_begin(), RD->field_end());
8778       }
8779     }
8780     for (auto &Pair : OverlappedData) {
8781       llvm::sort(
8782           Pair.getSecond(),
8783           [&Layout](
8784               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8785               OMPClauseMappableExprCommon::MappableExprComponentListRef
8786                   Second) {
8787             auto CI = First.rbegin();
8788             auto CE = First.rend();
8789             auto SI = Second.rbegin();
8790             auto SE = Second.rend();
8791             for (; CI != CE && SI != SE; ++CI, ++SI) {
8792               if (CI->getAssociatedExpression()->getStmtClass() !=
8793                   SI->getAssociatedExpression()->getStmtClass())
8794                 break;
8795               // Are we dealing with different variables/fields?
8796               if (CI->getAssociatedDeclaration() !=
8797                   SI->getAssociatedDeclaration())
8798                 break;
8799             }
8800 
8801             // Lists contain the same elements.
8802             if (CI == CE && SI == SE)
8803               return false;
8804 
8805             // List with less elements is less than list with more elements.
8806             if (CI == CE || SI == SE)
8807               return CI == CE;
8808 
8809             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8810             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8811             if (FD1->getParent() == FD2->getParent())
8812               return FD1->getFieldIndex() < FD2->getFieldIndex();
8813             const auto It =
8814                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8815                   return FD == FD1 || FD == FD2;
8816                 });
8817             return *It == FD1;
8818           });
8819     }
8820 
8821     // Associated with a capture, because the mapping flags depend on it.
8822     // Go through all of the elements with the overlapped elements.
8823     for (const auto &Pair : OverlappedData) {
8824       const MapData &L = *Pair.getFirst();
8825       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8826       OpenMPMapClauseKind MapType;
8827       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8828       bool IsImplicit;
8829       const ValueDecl *Mapper;
8830       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8831       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8832           OverlappedComponents = Pair.getSecond();
8833       bool IsFirstComponentList = true;
8834       generateInfoForComponentList(
8835           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8836           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8837           /*ForDeviceAddr=*/false, OverlappedComponents);
8838     }
8839     // Go through other elements without overlapped elements.
8840     bool IsFirstComponentList = OverlappedData.empty();
8841     for (const MapData &L : DeclComponentLists) {
8842       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8843       OpenMPMapClauseKind MapType;
8844       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8845       bool IsImplicit;
8846       const ValueDecl *Mapper;
8847       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8848       auto It = OverlappedData.find(&L);
8849       if (It == OverlappedData.end())
8850         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8851                                      Components, CombinedInfo, PartialStruct,
8852                                      IsFirstComponentList, IsImplicit, Mapper);
8853       IsFirstComponentList = false;
8854     }
8855   }
8856 
8857   /// Generate the default map information for a given capture \a CI,
8858   /// record field declaration \a RI and captured value \a CV.
8859   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8860                               const FieldDecl &RI, llvm::Value *CV,
8861                               MapCombinedInfoTy &CombinedInfo) const {
8862     bool IsImplicit = true;
8863     // Do the default mapping.
8864     if (CI.capturesThis()) {
8865       CombinedInfo.BasePointers.push_back(CV);
8866       CombinedInfo.Pointers.push_back(CV);
8867       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8868       CombinedInfo.Sizes.push_back(
8869           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8870                                     CGF.Int64Ty, /*isSigned=*/true));
8871       // Default map type.
8872       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8873     } else if (CI.capturesVariableByCopy()) {
8874       CombinedInfo.BasePointers.push_back(CV);
8875       CombinedInfo.Pointers.push_back(CV);
8876       if (!RI.getType()->isAnyPointerType()) {
8877         // We have to signal to the runtime captures passed by value that are
8878         // not pointers.
8879         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8880         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8881             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8882       } else {
8883         // Pointers are implicitly mapped with a zero size and no flags
8884         // (other than first map that is added for all implicit maps).
8885         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8886         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8887       }
8888       const VarDecl *VD = CI.getCapturedVar();
8889       auto I = FirstPrivateDecls.find(VD);
8890       if (I != FirstPrivateDecls.end())
8891         IsImplicit = I->getSecond();
8892     } else {
8893       assert(CI.capturesVariable() && "Expected captured reference.");
8894       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8895       QualType ElementType = PtrTy->getPointeeType();
8896       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8897           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8898       // The default map type for a scalar/complex type is 'to' because by
8899       // default the value doesn't have to be retrieved. For an aggregate
8900       // type, the default is 'tofrom'.
8901       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8902       const VarDecl *VD = CI.getCapturedVar();
8903       auto I = FirstPrivateDecls.find(VD);
8904       if (I != FirstPrivateDecls.end() &&
8905           VD->getType().isConstant(CGF.getContext())) {
8906         llvm::Constant *Addr =
8907             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8908         // Copy the value of the original variable to the new global copy.
8909         CGF.Builder.CreateMemCpy(
8910             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8911             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8912             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8913         // Use new global variable as the base pointers.
8914         CombinedInfo.BasePointers.push_back(Addr);
8915         CombinedInfo.Pointers.push_back(Addr);
8916       } else {
8917         CombinedInfo.BasePointers.push_back(CV);
8918         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8919           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8920               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8921               AlignmentSource::Decl));
8922           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8923         } else {
8924           CombinedInfo.Pointers.push_back(CV);
8925         }
8926       }
8927       if (I != FirstPrivateDecls.end())
8928         IsImplicit = I->getSecond();
8929     }
8930     // Every default map produces a single argument which is a target parameter.
8931     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8932 
8933     // Add flag stating this is an implicit map.
8934     if (IsImplicit)
8935       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8936 
8937     // No user-defined mapper for default mapping.
8938     CombinedInfo.Mappers.push_back(nullptr);
8939   }
8940 };
8941 } // anonymous namespace
8942 
8943 static void emitNonContiguousDescriptor(
8944     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8945     CGOpenMPRuntime::TargetDataInfo &Info) {
8946   CodeGenModule &CGM = CGF.CGM;
8947   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
8948       &NonContigInfo = CombinedInfo.NonContigInfo;
8949 
8950   // Build an array of struct descriptor_dim and then assign it to
8951   // offload_args.
8952   //
8953   // struct descriptor_dim {
8954   //  uint64_t offset;
8955   //  uint64_t count;
8956   //  uint64_t stride
8957   // };
8958   ASTContext &C = CGF.getContext();
8959   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
8960   RecordDecl *RD;
8961   RD = C.buildImplicitRecord("descriptor_dim");
8962   RD->startDefinition();
8963   addFieldToRecordDecl(C, RD, Int64Ty);
8964   addFieldToRecordDecl(C, RD, Int64Ty);
8965   addFieldToRecordDecl(C, RD, Int64Ty);
8966   RD->completeDefinition();
8967   QualType DimTy = C.getRecordType(RD);
8968 
8969   enum { OffsetFD = 0, CountFD, StrideFD };
8970   // We need two index variable here since the size of "Dims" is the same as the
8971   // size of Components, however, the size of offset, count, and stride is equal
8972   // to the size of base declaration that is non-contiguous.
8973   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
8974     // Skip emitting ir if dimension size is 1 since it cannot be
8975     // non-contiguous.
8976     if (NonContigInfo.Dims[I] == 1)
8977       continue;
8978     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
8979     QualType ArrayTy =
8980         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
8981     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
8982     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
8983       unsigned RevIdx = EE - II - 1;
8984       LValue DimsLVal = CGF.MakeAddrLValue(
8985           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
8986       // Offset
8987       LValue OffsetLVal = CGF.EmitLValueForField(
8988           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
8989       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
8990       // Count
8991       LValue CountLVal = CGF.EmitLValueForField(
8992           DimsLVal, *std::next(RD->field_begin(), CountFD));
8993       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
8994       // Stride
8995       LValue StrideLVal = CGF.EmitLValueForField(
8996           DimsLVal, *std::next(RD->field_begin(), StrideFD));
8997       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
8998     }
8999     // args[I] = &dims
9000     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9001         DimsAddr, CGM.Int8PtrTy);
9002     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9003         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9004         Info.PointersArray, 0, I);
9005     Address PAddr(P, CGF.getPointerAlign());
9006     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9007     ++L;
9008   }
9009 }
9010 
9011 /// Emit the arrays used to pass the captures and map information to the
9012 /// offloading runtime library. If there is no map or capture information,
9013 /// return nullptr by reference.
9014 static void
9015 emitOffloadingArrays(CodeGenFunction &CGF,
9016                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9017                      CGOpenMPRuntime::TargetDataInfo &Info,
9018                      bool IsNonContiguous = false) {
9019   CodeGenModule &CGM = CGF.CGM;
9020   ASTContext &Ctx = CGF.getContext();
9021 
9022   // Reset the array information.
9023   Info.clearArrayInfo();
9024   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9025 
9026   if (Info.NumberOfPtrs) {
9027     // Detect if we have any capture size requiring runtime evaluation of the
9028     // size so that a constant array could be eventually used.
9029     bool hasRuntimeEvaluationCaptureSize = false;
9030     for (llvm::Value *S : CombinedInfo.Sizes)
9031       if (!isa<llvm::Constant>(S)) {
9032         hasRuntimeEvaluationCaptureSize = true;
9033         break;
9034       }
9035 
9036     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9037     QualType PointerArrayType = Ctx.getConstantArrayType(
9038         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9039         /*IndexTypeQuals=*/0);
9040 
9041     Info.BasePointersArray =
9042         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9043     Info.PointersArray =
9044         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9045     Address MappersArray =
9046         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9047     Info.MappersArray = MappersArray.getPointer();
9048 
9049     // If we don't have any VLA types or other types that require runtime
9050     // evaluation, we can use a constant array for the map sizes, otherwise we
9051     // need to fill up the arrays as we do for the pointers.
9052     QualType Int64Ty =
9053         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9054     if (hasRuntimeEvaluationCaptureSize) {
9055       QualType SizeArrayType = Ctx.getConstantArrayType(
9056           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9057           /*IndexTypeQuals=*/0);
9058       Info.SizesArray =
9059           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9060     } else {
9061       // We expect all the sizes to be constant, so we collect them to create
9062       // a constant array.
9063       SmallVector<llvm::Constant *, 16> ConstSizes;
9064       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9065         if (IsNonContiguous &&
9066             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9067           ConstSizes.push_back(llvm::ConstantInt::get(
9068               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9069         } else {
9070           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9071         }
9072       }
9073 
9074       auto *SizesArrayInit = llvm::ConstantArray::get(
9075           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9076       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9077       auto *SizesArrayGbl = new llvm::GlobalVariable(
9078           CGM.getModule(), SizesArrayInit->getType(),
9079           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9080           SizesArrayInit, Name);
9081       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9082       Info.SizesArray = SizesArrayGbl;
9083     }
9084 
9085     // The map types are always constant so we don't need to generate code to
9086     // fill arrays. Instead, we create an array constant.
9087     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9088     llvm::copy(CombinedInfo.Types, Mapping.begin());
9089     llvm::Constant *MapTypesArrayInit =
9090         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9091     std::string MaptypesName =
9092         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9093     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9094         CGM.getModule(), MapTypesArrayInit->getType(),
9095         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9096         MapTypesArrayInit, MaptypesName);
9097     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9098     Info.MapTypesArray = MapTypesArrayGbl;
9099 
9100     // If there's a present map type modifier, it must not be applied to the end
9101     // of a region, so generate a separate map type array in that case.
9102     if (Info.separateBeginEndCalls()) {
9103       bool EndMapTypesDiffer = false;
9104       for (uint64_t &Type : Mapping) {
9105         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9106           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9107           EndMapTypesDiffer = true;
9108         }
9109       }
9110       if (EndMapTypesDiffer) {
9111         MapTypesArrayInit =
9112             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9113         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9114         MapTypesArrayGbl = new llvm::GlobalVariable(
9115             CGM.getModule(), MapTypesArrayInit->getType(),
9116             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9117             MapTypesArrayInit, MaptypesName);
9118         MapTypesArrayGbl->setUnnamedAddr(
9119             llvm::GlobalValue::UnnamedAddr::Global);
9120         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9121       }
9122     }
9123 
9124     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9125       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9126       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9127           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9128           Info.BasePointersArray, 0, I);
9129       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9130           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9131       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9132       CGF.Builder.CreateStore(BPVal, BPAddr);
9133 
9134       if (Info.requiresDevicePointerInfo())
9135         if (const ValueDecl *DevVD =
9136                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9137           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9138 
9139       llvm::Value *PVal = CombinedInfo.Pointers[I];
9140       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9141           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9142           Info.PointersArray, 0, I);
9143       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9144           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9145       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9146       CGF.Builder.CreateStore(PVal, PAddr);
9147 
9148       if (hasRuntimeEvaluationCaptureSize) {
9149         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9150             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9151             Info.SizesArray,
9152             /*Idx0=*/0,
9153             /*Idx1=*/I);
9154         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9155         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9156                                                           CGM.Int64Ty,
9157                                                           /*isSigned=*/true),
9158                                 SAddr);
9159       }
9160 
9161       // Fill up the mapper array.
9162       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9163       if (CombinedInfo.Mappers[I]) {
9164         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9165             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9166         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9167         Info.HasMapper = true;
9168       }
9169       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9170       CGF.Builder.CreateStore(MFunc, MAddr);
9171     }
9172   }
9173 
9174   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9175       Info.NumberOfPtrs == 0)
9176     return;
9177 
9178   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9179 }
9180 
9181 namespace {
9182 /// Additional arguments for emitOffloadingArraysArgument function.
9183 struct ArgumentsOptions {
9184   bool ForEndCall = false;
9185   ArgumentsOptions() = default;
9186   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9187 };
9188 } // namespace
9189 
9190 /// Emit the arguments to be passed to the runtime library based on the
9191 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9192 /// ForEndCall, emit map types to be passed for the end of the region instead of
9193 /// the beginning.
9194 static void emitOffloadingArraysArgument(
9195     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9196     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9197     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
9198     CGOpenMPRuntime::TargetDataInfo &Info,
9199     const ArgumentsOptions &Options = ArgumentsOptions()) {
9200   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9201          "expected region end call to runtime only when end call is separate");
9202   CodeGenModule &CGM = CGF.CGM;
9203   if (Info.NumberOfPtrs) {
9204     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9205         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9206         Info.BasePointersArray,
9207         /*Idx0=*/0, /*Idx1=*/0);
9208     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9209         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9210         Info.PointersArray,
9211         /*Idx0=*/0,
9212         /*Idx1=*/0);
9213     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9214         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9215         /*Idx0=*/0, /*Idx1=*/0);
9216     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9217         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9218         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9219                                                     : Info.MapTypesArray,
9220         /*Idx0=*/0,
9221         /*Idx1=*/0);
9222     // If there is no user-defined mapper, set the mapper array to nullptr to
9223     // avoid an unnecessary data privatization
9224     if (!Info.HasMapper)
9225       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9226     else
9227       MappersArrayArg =
9228           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9229   } else {
9230     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9231     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9232     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9233     MapTypesArrayArg =
9234         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9235     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9236   }
9237 }
9238 
9239 /// Check for inner distribute directive.
9240 static const OMPExecutableDirective *
9241 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9242   const auto *CS = D.getInnermostCapturedStmt();
9243   const auto *Body =
9244       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9245   const Stmt *ChildStmt =
9246       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9247 
9248   if (const auto *NestedDir =
9249           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9250     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9251     switch (D.getDirectiveKind()) {
9252     case OMPD_target:
9253       if (isOpenMPDistributeDirective(DKind))
9254         return NestedDir;
9255       if (DKind == OMPD_teams) {
9256         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9257             /*IgnoreCaptured=*/true);
9258         if (!Body)
9259           return nullptr;
9260         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9261         if (const auto *NND =
9262                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9263           DKind = NND->getDirectiveKind();
9264           if (isOpenMPDistributeDirective(DKind))
9265             return NND;
9266         }
9267       }
9268       return nullptr;
9269     case OMPD_target_teams:
9270       if (isOpenMPDistributeDirective(DKind))
9271         return NestedDir;
9272       return nullptr;
9273     case OMPD_target_parallel:
9274     case OMPD_target_simd:
9275     case OMPD_target_parallel_for:
9276     case OMPD_target_parallel_for_simd:
9277       return nullptr;
9278     case OMPD_target_teams_distribute:
9279     case OMPD_target_teams_distribute_simd:
9280     case OMPD_target_teams_distribute_parallel_for:
9281     case OMPD_target_teams_distribute_parallel_for_simd:
9282     case OMPD_parallel:
9283     case OMPD_for:
9284     case OMPD_parallel_for:
9285     case OMPD_parallel_master:
9286     case OMPD_parallel_sections:
9287     case OMPD_for_simd:
9288     case OMPD_parallel_for_simd:
9289     case OMPD_cancel:
9290     case OMPD_cancellation_point:
9291     case OMPD_ordered:
9292     case OMPD_threadprivate:
9293     case OMPD_allocate:
9294     case OMPD_task:
9295     case OMPD_simd:
9296     case OMPD_sections:
9297     case OMPD_section:
9298     case OMPD_single:
9299     case OMPD_master:
9300     case OMPD_critical:
9301     case OMPD_taskyield:
9302     case OMPD_barrier:
9303     case OMPD_taskwait:
9304     case OMPD_taskgroup:
9305     case OMPD_atomic:
9306     case OMPD_flush:
9307     case OMPD_depobj:
9308     case OMPD_scan:
9309     case OMPD_teams:
9310     case OMPD_target_data:
9311     case OMPD_target_exit_data:
9312     case OMPD_target_enter_data:
9313     case OMPD_distribute:
9314     case OMPD_distribute_simd:
9315     case OMPD_distribute_parallel_for:
9316     case OMPD_distribute_parallel_for_simd:
9317     case OMPD_teams_distribute:
9318     case OMPD_teams_distribute_simd:
9319     case OMPD_teams_distribute_parallel_for:
9320     case OMPD_teams_distribute_parallel_for_simd:
9321     case OMPD_target_update:
9322     case OMPD_declare_simd:
9323     case OMPD_declare_variant:
9324     case OMPD_begin_declare_variant:
9325     case OMPD_end_declare_variant:
9326     case OMPD_declare_target:
9327     case OMPD_end_declare_target:
9328     case OMPD_declare_reduction:
9329     case OMPD_declare_mapper:
9330     case OMPD_taskloop:
9331     case OMPD_taskloop_simd:
9332     case OMPD_master_taskloop:
9333     case OMPD_master_taskloop_simd:
9334     case OMPD_parallel_master_taskloop:
9335     case OMPD_parallel_master_taskloop_simd:
9336     case OMPD_requires:
9337     case OMPD_unknown:
9338     default:
9339       llvm_unreachable("Unexpected directive.");
9340     }
9341   }
9342 
9343   return nullptr;
9344 }
9345 
9346 /// Emit the user-defined mapper function. The code generation follows the
9347 /// pattern in the example below.
9348 /// \code
9349 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9350 ///                                           void *base, void *begin,
9351 ///                                           int64_t size, int64_t type) {
9352 ///   // Allocate space for an array section first.
9353 ///   if (size > 1 && !maptype.IsDelete)
9354 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9355 ///                                 size*sizeof(Ty), clearToFrom(type));
9356 ///   // Map members.
9357 ///   for (unsigned i = 0; i < size; i++) {
9358 ///     // For each component specified by this mapper:
9359 ///     for (auto c : all_components) {
9360 ///       if (c.hasMapper())
9361 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9362 ///                       c.arg_type);
9363 ///       else
9364 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9365 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9366 ///     }
9367 ///   }
9368 ///   // Delete the array section.
9369 ///   if (size > 1 && maptype.IsDelete)
9370 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9371 ///                                 size*sizeof(Ty), clearToFrom(type));
9372 /// }
9373 /// \endcode
9374 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9375                                             CodeGenFunction *CGF) {
9376   if (UDMMap.count(D) > 0)
9377     return;
9378   ASTContext &C = CGM.getContext();
9379   QualType Ty = D->getType();
9380   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9381   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9382   auto *MapperVarDecl =
9383       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9384   SourceLocation Loc = D->getLocation();
9385   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9386 
9387   // Prepare mapper function arguments and attributes.
9388   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9389                               C.VoidPtrTy, ImplicitParamDecl::Other);
9390   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9391                             ImplicitParamDecl::Other);
9392   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9393                              C.VoidPtrTy, ImplicitParamDecl::Other);
9394   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9395                             ImplicitParamDecl::Other);
9396   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9397                             ImplicitParamDecl::Other);
9398   FunctionArgList Args;
9399   Args.push_back(&HandleArg);
9400   Args.push_back(&BaseArg);
9401   Args.push_back(&BeginArg);
9402   Args.push_back(&SizeArg);
9403   Args.push_back(&TypeArg);
9404   const CGFunctionInfo &FnInfo =
9405       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9406   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9407   SmallString<64> TyStr;
9408   llvm::raw_svector_ostream Out(TyStr);
9409   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9410   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9411   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9412                                     Name, &CGM.getModule());
9413   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9414   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9415   // Start the mapper function code generation.
9416   CodeGenFunction MapperCGF(CGM);
9417   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9418   // Compute the starting and end addreses of array elements.
9419   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9420       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9421       C.getPointerType(Int64Ty), Loc);
9422   // Convert the size in bytes into the number of array elements.
9423   Size = MapperCGF.Builder.CreateExactUDiv(
9424       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9425   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9426       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9427       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9428   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9429   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9430       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9431       C.getPointerType(Int64Ty), Loc);
9432   // Prepare common arguments for array initiation and deletion.
9433   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9434       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9435       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9436   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9437       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9438       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9439   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9440       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9441       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9442 
9443   // Emit array initiation if this is an array section and \p MapType indicates
9444   // that memory allocation is required.
9445   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9446   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9447                              ElementSize, HeadBB, /*IsInit=*/true);
9448 
9449   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9450 
9451   // Emit the loop header block.
9452   MapperCGF.EmitBlock(HeadBB);
9453   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9454   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9455   // Evaluate whether the initial condition is satisfied.
9456   llvm::Value *IsEmpty =
9457       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9458   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9459   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9460 
9461   // Emit the loop body block.
9462   MapperCGF.EmitBlock(BodyBB);
9463   llvm::BasicBlock *LastBB = BodyBB;
9464   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9465       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9466   PtrPHI->addIncoming(PtrBegin, EntryBB);
9467   Address PtrCurrent =
9468       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9469                           .getAlignment()
9470                           .alignmentOfArrayElement(ElementSize));
9471   // Privatize the declared variable of mapper to be the current array element.
9472   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9473   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9474     return MapperCGF
9475         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9476         .getAddress(MapperCGF);
9477   });
9478   (void)Scope.Privatize();
9479 
9480   // Get map clause information. Fill up the arrays with all mapped variables.
9481   MappableExprsHandler::MapCombinedInfoTy Info;
9482   MappableExprsHandler MEHandler(*D, MapperCGF);
9483   MEHandler.generateAllInfoForMapper(Info);
9484 
9485   // Call the runtime API __tgt_mapper_num_components to get the number of
9486   // pre-existing components.
9487   llvm::Value *OffloadingArgs[] = {Handle};
9488   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9489       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9490                                             OMPRTL___tgt_mapper_num_components),
9491       OffloadingArgs);
9492   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9493       PreviousSize,
9494       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9495 
9496   // Fill up the runtime mapper handle for all components.
9497   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9498     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9499         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9500     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9501         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9502     llvm::Value *CurSizeArg = Info.Sizes[I];
9503 
9504     // Extract the MEMBER_OF field from the map type.
9505     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9506     MapperCGF.EmitBlock(MemberBB);
9507     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9508     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9509         OriMapType,
9510         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9511     llvm::BasicBlock *MemberCombineBB =
9512         MapperCGF.createBasicBlock("omp.member.combine");
9513     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9514     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9515     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9516     // Add the number of pre-existing components to the MEMBER_OF field if it
9517     // is valid.
9518     MapperCGF.EmitBlock(MemberCombineBB);
9519     llvm::Value *CombinedMember =
9520         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9521     // Do nothing if it is not a member of previous components.
9522     MapperCGF.EmitBlock(TypeBB);
9523     llvm::PHINode *MemberMapType =
9524         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9525     MemberMapType->addIncoming(OriMapType, MemberBB);
9526     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9527 
9528     // Combine the map type inherited from user-defined mapper with that
9529     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9530     // bits of the \a MapType, which is the input argument of the mapper
9531     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9532     // bits of MemberMapType.
9533     // [OpenMP 5.0], 1.2.6. map-type decay.
9534     //        | alloc |  to   | from  | tofrom | release | delete
9535     // ----------------------------------------------------------
9536     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9537     // to     | alloc |  to   | alloc |   to   | release | delete
9538     // from   | alloc | alloc | from  |  from  | release | delete
9539     // tofrom | alloc |  to   | from  | tofrom | release | delete
9540     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9541         MapType,
9542         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9543                                    MappableExprsHandler::OMP_MAP_FROM));
9544     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9545     llvm::BasicBlock *AllocElseBB =
9546         MapperCGF.createBasicBlock("omp.type.alloc.else");
9547     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9548     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9549     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9550     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9551     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9552     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9553     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9554     MapperCGF.EmitBlock(AllocBB);
9555     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9556         MemberMapType,
9557         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9558                                      MappableExprsHandler::OMP_MAP_FROM)));
9559     MapperCGF.Builder.CreateBr(EndBB);
9560     MapperCGF.EmitBlock(AllocElseBB);
9561     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9562         LeftToFrom,
9563         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9564     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9565     // In case of to, clear OMP_MAP_FROM.
9566     MapperCGF.EmitBlock(ToBB);
9567     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9568         MemberMapType,
9569         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9570     MapperCGF.Builder.CreateBr(EndBB);
9571     MapperCGF.EmitBlock(ToElseBB);
9572     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9573         LeftToFrom,
9574         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9575     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9576     // In case of from, clear OMP_MAP_TO.
9577     MapperCGF.EmitBlock(FromBB);
9578     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9579         MemberMapType,
9580         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9581     // In case of tofrom, do nothing.
9582     MapperCGF.EmitBlock(EndBB);
9583     LastBB = EndBB;
9584     llvm::PHINode *CurMapType =
9585         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9586     CurMapType->addIncoming(AllocMapType, AllocBB);
9587     CurMapType->addIncoming(ToMapType, ToBB);
9588     CurMapType->addIncoming(FromMapType, FromBB);
9589     CurMapType->addIncoming(MemberMapType, ToElseBB);
9590 
9591     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9592                                      CurSizeArg, CurMapType};
9593     if (Info.Mappers[I]) {
9594       // Call the corresponding mapper function.
9595       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9596           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9597       assert(MapperFunc && "Expect a valid mapper function is available.");
9598       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9599     } else {
9600       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9601       // data structure.
9602       MapperCGF.EmitRuntimeCall(
9603           OMPBuilder.getOrCreateRuntimeFunction(
9604               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9605           OffloadingArgs);
9606     }
9607   }
9608 
9609   // Update the pointer to point to the next element that needs to be mapped,
9610   // and check whether we have mapped all elements.
9611   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9612       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9613   PtrPHI->addIncoming(PtrNext, LastBB);
9614   llvm::Value *IsDone =
9615       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9616   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9617   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9618 
9619   MapperCGF.EmitBlock(ExitBB);
9620   // Emit array deletion if this is an array section and \p MapType indicates
9621   // that deletion is required.
9622   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9623                              ElementSize, DoneBB, /*IsInit=*/false);
9624 
9625   // Emit the function exit block.
9626   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9627   MapperCGF.FinishFunction();
9628   UDMMap.try_emplace(D, Fn);
9629   if (CGF) {
9630     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9631     Decls.second.push_back(D);
9632   }
9633 }
9634 
9635 /// Emit the array initialization or deletion portion for user-defined mapper
9636 /// code generation. First, it evaluates whether an array section is mapped and
9637 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9638 /// true, and \a MapType indicates to not delete this array, array
9639 /// initialization code is generated. If \a IsInit is false, and \a MapType
9640 /// indicates to not this array, array deletion code is generated.
9641 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9642     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9643     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9644     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9645   StringRef Prefix = IsInit ? ".init" : ".del";
9646 
9647   // Evaluate if this is an array section.
9648   llvm::BasicBlock *IsDeleteBB =
9649       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9650   llvm::BasicBlock *BodyBB =
9651       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9652   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9653       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9654   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9655 
9656   // Evaluate if we are going to delete this section.
9657   MapperCGF.EmitBlock(IsDeleteBB);
9658   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9659       MapType,
9660       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9661   llvm::Value *DeleteCond;
9662   if (IsInit) {
9663     DeleteCond = MapperCGF.Builder.CreateIsNull(
9664         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9665   } else {
9666     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9667         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9668   }
9669   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9670 
9671   MapperCGF.EmitBlock(BodyBB);
9672   // Get the array size by multiplying element size and element number (i.e., \p
9673   // Size).
9674   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9675       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9676   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9677   // memory allocation/deletion purpose only.
9678   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9679       MapType,
9680       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9681                                    MappableExprsHandler::OMP_MAP_FROM)));
9682   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9683   // data structure.
9684   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9685   MapperCGF.EmitRuntimeCall(
9686       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9687                                             OMPRTL___tgt_push_mapper_component),
9688       OffloadingArgs);
9689 }
9690 
9691 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9692     const OMPDeclareMapperDecl *D) {
9693   auto I = UDMMap.find(D);
9694   if (I != UDMMap.end())
9695     return I->second;
9696   emitUserDefinedMapper(D);
9697   return UDMMap.lookup(D);
9698 }
9699 
9700 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9701     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9702     llvm::Value *DeviceID,
9703     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9704                                      const OMPLoopDirective &D)>
9705         SizeEmitter) {
9706   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9707   const OMPExecutableDirective *TD = &D;
9708   // Get nested teams distribute kind directive, if any.
9709   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9710     TD = getNestedDistributeDirective(CGM.getContext(), D);
9711   if (!TD)
9712     return;
9713   const auto *LD = cast<OMPLoopDirective>(TD);
9714   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9715                                                      PrePostActionTy &) {
9716     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9717       llvm::Value *Args[] = {DeviceID, NumIterations};
9718       CGF.EmitRuntimeCall(
9719           OMPBuilder.getOrCreateRuntimeFunction(
9720               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9721           Args);
9722     }
9723   };
9724   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9725 }
9726 
9727 void CGOpenMPRuntime::emitTargetCall(
9728     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9729     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9730     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9731     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9732                                      const OMPLoopDirective &D)>
9733         SizeEmitter) {
9734   if (!CGF.HaveInsertPoint())
9735     return;
9736 
9737   assert(OutlinedFn && "Invalid outlined function!");
9738 
9739   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9740                                  D.hasClausesOfKind<OMPNowaitClause>();
9741   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9742   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9743   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9744                                             PrePostActionTy &) {
9745     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9746   };
9747   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9748 
9749   CodeGenFunction::OMPTargetDataInfo InputInfo;
9750   llvm::Value *MapTypesArray = nullptr;
9751   // Fill up the pointer arrays and transfer execution to the device.
9752   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9753                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9754                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9755     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9756       // Reverse offloading is not supported, so just execute on the host.
9757       if (RequiresOuterTask) {
9758         CapturedVars.clear();
9759         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9760       }
9761       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9762       return;
9763     }
9764 
9765     // On top of the arrays that were filled up, the target offloading call
9766     // takes as arguments the device id as well as the host pointer. The host
9767     // pointer is used by the runtime library to identify the current target
9768     // region, so it only has to be unique and not necessarily point to
9769     // anything. It could be the pointer to the outlined function that
9770     // implements the target region, but we aren't using that so that the
9771     // compiler doesn't need to keep that, and could therefore inline the host
9772     // function if proven worthwhile during optimization.
9773 
9774     // From this point on, we need to have an ID of the target region defined.
9775     assert(OutlinedFnID && "Invalid outlined function ID!");
9776 
9777     // Emit device ID if any.
9778     llvm::Value *DeviceID;
9779     if (Device.getPointer()) {
9780       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9781               Device.getInt() == OMPC_DEVICE_device_num) &&
9782              "Expected device_num modifier.");
9783       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9784       DeviceID =
9785           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9786     } else {
9787       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9788     }
9789 
9790     // Emit the number of elements in the offloading arrays.
9791     llvm::Value *PointerNum =
9792         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9793 
9794     // Return value of the runtime offloading call.
9795     llvm::Value *Return;
9796 
9797     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9798     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9799 
9800     // Emit tripcount for the target loop-based directive.
9801     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9802 
9803     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9804     // The target region is an outlined function launched by the runtime
9805     // via calls __tgt_target() or __tgt_target_teams().
9806     //
9807     // __tgt_target() launches a target region with one team and one thread,
9808     // executing a serial region.  This master thread may in turn launch
9809     // more threads within its team upon encountering a parallel region,
9810     // however, no additional teams can be launched on the device.
9811     //
9812     // __tgt_target_teams() launches a target region with one or more teams,
9813     // each with one or more threads.  This call is required for target
9814     // constructs such as:
9815     //  'target teams'
9816     //  'target' / 'teams'
9817     //  'target teams distribute parallel for'
9818     //  'target parallel'
9819     // and so on.
9820     //
9821     // Note that on the host and CPU targets, the runtime implementation of
9822     // these calls simply call the outlined function without forking threads.
9823     // The outlined functions themselves have runtime calls to
9824     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9825     // the compiler in emitTeamsCall() and emitParallelCall().
9826     //
9827     // In contrast, on the NVPTX target, the implementation of
9828     // __tgt_target_teams() launches a GPU kernel with the requested number
9829     // of teams and threads so no additional calls to the runtime are required.
9830     if (NumTeams) {
9831       // If we have NumTeams defined this means that we have an enclosed teams
9832       // region. Therefore we also expect to have NumThreads defined. These two
9833       // values should be defined in the presence of a teams directive,
9834       // regardless of having any clauses associated. If the user is using teams
9835       // but no clauses, these two values will be the default that should be
9836       // passed to the runtime library - a 32-bit integer with the value zero.
9837       assert(NumThreads && "Thread limit expression should be available along "
9838                            "with number of teams.");
9839       llvm::Value *OffloadingArgs[] = {DeviceID,
9840                                        OutlinedFnID,
9841                                        PointerNum,
9842                                        InputInfo.BasePointersArray.getPointer(),
9843                                        InputInfo.PointersArray.getPointer(),
9844                                        InputInfo.SizesArray.getPointer(),
9845                                        MapTypesArray,
9846                                        InputInfo.MappersArray.getPointer(),
9847                                        NumTeams,
9848                                        NumThreads};
9849       Return = CGF.EmitRuntimeCall(
9850           OMPBuilder.getOrCreateRuntimeFunction(
9851               CGM.getModule(), HasNowait
9852                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9853                                    : OMPRTL___tgt_target_teams_mapper),
9854           OffloadingArgs);
9855     } else {
9856       llvm::Value *OffloadingArgs[] = {DeviceID,
9857                                        OutlinedFnID,
9858                                        PointerNum,
9859                                        InputInfo.BasePointersArray.getPointer(),
9860                                        InputInfo.PointersArray.getPointer(),
9861                                        InputInfo.SizesArray.getPointer(),
9862                                        MapTypesArray,
9863                                        InputInfo.MappersArray.getPointer()};
9864       Return = CGF.EmitRuntimeCall(
9865           OMPBuilder.getOrCreateRuntimeFunction(
9866               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9867                                          : OMPRTL___tgt_target_mapper),
9868           OffloadingArgs);
9869     }
9870 
9871     // Check the error code and execute the host version if required.
9872     llvm::BasicBlock *OffloadFailedBlock =
9873         CGF.createBasicBlock("omp_offload.failed");
9874     llvm::BasicBlock *OffloadContBlock =
9875         CGF.createBasicBlock("omp_offload.cont");
9876     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9877     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9878 
9879     CGF.EmitBlock(OffloadFailedBlock);
9880     if (RequiresOuterTask) {
9881       CapturedVars.clear();
9882       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9883     }
9884     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9885     CGF.EmitBranch(OffloadContBlock);
9886 
9887     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9888   };
9889 
9890   // Notify that the host version must be executed.
9891   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9892                     RequiresOuterTask](CodeGenFunction &CGF,
9893                                        PrePostActionTy &) {
9894     if (RequiresOuterTask) {
9895       CapturedVars.clear();
9896       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9897     }
9898     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9899   };
9900 
9901   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9902                           &CapturedVars, RequiresOuterTask,
9903                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9904     // Fill up the arrays with all the captured variables.
9905     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9906 
9907     // Get mappable expression information.
9908     MappableExprsHandler MEHandler(D, CGF);
9909     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9910     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9911 
9912     auto RI = CS.getCapturedRecordDecl()->field_begin();
9913     auto CV = CapturedVars.begin();
9914     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9915                                               CE = CS.capture_end();
9916          CI != CE; ++CI, ++RI, ++CV) {
9917       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9918       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9919 
9920       // VLA sizes are passed to the outlined region by copy and do not have map
9921       // information associated.
9922       if (CI->capturesVariableArrayType()) {
9923         CurInfo.BasePointers.push_back(*CV);
9924         CurInfo.Pointers.push_back(*CV);
9925         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9926             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9927         // Copy to the device as an argument. No need to retrieve it.
9928         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9929                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9930                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9931         CurInfo.Mappers.push_back(nullptr);
9932       } else {
9933         // If we have any information in the map clause, we use it, otherwise we
9934         // just do a default mapping.
9935         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9936         if (!CI->capturesThis())
9937           MappedVarSet.insert(CI->getCapturedVar());
9938         else
9939           MappedVarSet.insert(nullptr);
9940         if (CurInfo.BasePointers.empty())
9941           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9942         // Generate correct mapping for variables captured by reference in
9943         // lambdas.
9944         if (CI->capturesVariable())
9945           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9946                                                   CurInfo, LambdaPointers);
9947       }
9948       // We expect to have at least an element of information for this capture.
9949       assert(!CurInfo.BasePointers.empty() &&
9950              "Non-existing map pointer for capture!");
9951       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9952              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9953              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9954              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9955              "Inconsistent map information sizes!");
9956 
9957       // If there is an entry in PartialStruct it means we have a struct with
9958       // individual members mapped. Emit an extra combined entry.
9959       if (PartialStruct.Base.isValid())
9960         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9961 
9962       // We need to append the results of this capture to what we already have.
9963       CombinedInfo.append(CurInfo);
9964     }
9965     // Adjust MEMBER_OF flags for the lambdas captures.
9966     MEHandler.adjustMemberOfForLambdaCaptures(
9967         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9968         CombinedInfo.Types);
9969     // Map any list items in a map clause that were not captures because they
9970     // weren't referenced within the construct.
9971     MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true,
9972                               MappedVarSet);
9973 
9974     TargetDataInfo Info;
9975     // Fill up the arrays and create the arguments.
9976     emitOffloadingArrays(CGF, CombinedInfo, Info);
9977     emitOffloadingArraysArgument(
9978         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
9979         Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false});
9980     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9981     InputInfo.BasePointersArray =
9982         Address(Info.BasePointersArray, CGM.getPointerAlign());
9983     InputInfo.PointersArray =
9984         Address(Info.PointersArray, CGM.getPointerAlign());
9985     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9986     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9987     MapTypesArray = Info.MapTypesArray;
9988     if (RequiresOuterTask)
9989       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9990     else
9991       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9992   };
9993 
9994   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9995                              CodeGenFunction &CGF, PrePostActionTy &) {
9996     if (RequiresOuterTask) {
9997       CodeGenFunction::OMPTargetDataInfo InputInfo;
9998       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9999     } else {
10000       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10001     }
10002   };
10003 
10004   // If we have a target function ID it means that we need to support
10005   // offloading, otherwise, just execute on the host. We need to execute on host
10006   // regardless of the conditional in the if clause if, e.g., the user do not
10007   // specify target triples.
10008   if (OutlinedFnID) {
10009     if (IfCond) {
10010       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10011     } else {
10012       RegionCodeGenTy ThenRCG(TargetThenGen);
10013       ThenRCG(CGF);
10014     }
10015   } else {
10016     RegionCodeGenTy ElseRCG(TargetElseGen);
10017     ElseRCG(CGF);
10018   }
10019 }
10020 
10021 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10022                                                     StringRef ParentName) {
10023   if (!S)
10024     return;
10025 
10026   // Codegen OMP target directives that offload compute to the device.
10027   bool RequiresDeviceCodegen =
10028       isa<OMPExecutableDirective>(S) &&
10029       isOpenMPTargetExecutionDirective(
10030           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10031 
10032   if (RequiresDeviceCodegen) {
10033     const auto &E = *cast<OMPExecutableDirective>(S);
10034     unsigned DeviceID;
10035     unsigned FileID;
10036     unsigned Line;
10037     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10038                              FileID, Line);
10039 
10040     // Is this a target region that should not be emitted as an entry point? If
10041     // so just signal we are done with this target region.
10042     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10043                                                             ParentName, Line))
10044       return;
10045 
10046     switch (E.getDirectiveKind()) {
10047     case OMPD_target:
10048       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10049                                                    cast<OMPTargetDirective>(E));
10050       break;
10051     case OMPD_target_parallel:
10052       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10053           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10054       break;
10055     case OMPD_target_teams:
10056       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10057           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10058       break;
10059     case OMPD_target_teams_distribute:
10060       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10061           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10062       break;
10063     case OMPD_target_teams_distribute_simd:
10064       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10065           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10066       break;
10067     case OMPD_target_parallel_for:
10068       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10069           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10070       break;
10071     case OMPD_target_parallel_for_simd:
10072       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10073           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10074       break;
10075     case OMPD_target_simd:
10076       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10077           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10078       break;
10079     case OMPD_target_teams_distribute_parallel_for:
10080       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10081           CGM, ParentName,
10082           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10083       break;
10084     case OMPD_target_teams_distribute_parallel_for_simd:
10085       CodeGenFunction::
10086           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10087               CGM, ParentName,
10088               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10089       break;
10090     case OMPD_parallel:
10091     case OMPD_for:
10092     case OMPD_parallel_for:
10093     case OMPD_parallel_master:
10094     case OMPD_parallel_sections:
10095     case OMPD_for_simd:
10096     case OMPD_parallel_for_simd:
10097     case OMPD_cancel:
10098     case OMPD_cancellation_point:
10099     case OMPD_ordered:
10100     case OMPD_threadprivate:
10101     case OMPD_allocate:
10102     case OMPD_task:
10103     case OMPD_simd:
10104     case OMPD_sections:
10105     case OMPD_section:
10106     case OMPD_single:
10107     case OMPD_master:
10108     case OMPD_critical:
10109     case OMPD_taskyield:
10110     case OMPD_barrier:
10111     case OMPD_taskwait:
10112     case OMPD_taskgroup:
10113     case OMPD_atomic:
10114     case OMPD_flush:
10115     case OMPD_depobj:
10116     case OMPD_scan:
10117     case OMPD_teams:
10118     case OMPD_target_data:
10119     case OMPD_target_exit_data:
10120     case OMPD_target_enter_data:
10121     case OMPD_distribute:
10122     case OMPD_distribute_simd:
10123     case OMPD_distribute_parallel_for:
10124     case OMPD_distribute_parallel_for_simd:
10125     case OMPD_teams_distribute:
10126     case OMPD_teams_distribute_simd:
10127     case OMPD_teams_distribute_parallel_for:
10128     case OMPD_teams_distribute_parallel_for_simd:
10129     case OMPD_target_update:
10130     case OMPD_declare_simd:
10131     case OMPD_declare_variant:
10132     case OMPD_begin_declare_variant:
10133     case OMPD_end_declare_variant:
10134     case OMPD_declare_target:
10135     case OMPD_end_declare_target:
10136     case OMPD_declare_reduction:
10137     case OMPD_declare_mapper:
10138     case OMPD_taskloop:
10139     case OMPD_taskloop_simd:
10140     case OMPD_master_taskloop:
10141     case OMPD_master_taskloop_simd:
10142     case OMPD_parallel_master_taskloop:
10143     case OMPD_parallel_master_taskloop_simd:
10144     case OMPD_requires:
10145     case OMPD_unknown:
10146     default:
10147       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10148     }
10149     return;
10150   }
10151 
10152   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10153     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10154       return;
10155 
10156     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10157     return;
10158   }
10159 
10160   // If this is a lambda function, look into its body.
10161   if (const auto *L = dyn_cast<LambdaExpr>(S))
10162     S = L->getBody();
10163 
10164   // Keep looking for target regions recursively.
10165   for (const Stmt *II : S->children())
10166     scanForTargetRegionsFunctions(II, ParentName);
10167 }
10168 
10169 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10170   // If emitting code for the host, we do not process FD here. Instead we do
10171   // the normal code generation.
10172   if (!CGM.getLangOpts().OpenMPIsDevice) {
10173     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10174       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10175           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10176       // Do not emit device_type(nohost) functions for the host.
10177       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10178         return true;
10179     }
10180     return false;
10181   }
10182 
10183   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10184   // Try to detect target regions in the function.
10185   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10186     StringRef Name = CGM.getMangledName(GD);
10187     scanForTargetRegionsFunctions(FD->getBody(), Name);
10188     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10189         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10190     // Do not emit device_type(nohost) functions for the host.
10191     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10192       return true;
10193   }
10194 
10195   // Do not to emit function if it is not marked as declare target.
10196   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10197          AlreadyEmittedTargetDecls.count(VD) == 0;
10198 }
10199 
10200 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10201   if (!CGM.getLangOpts().OpenMPIsDevice)
10202     return false;
10203 
10204   // Check if there are Ctors/Dtors in this declaration and look for target
10205   // regions in it. We use the complete variant to produce the kernel name
10206   // mangling.
10207   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10208   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10209     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10210       StringRef ParentName =
10211           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10212       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10213     }
10214     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10215       StringRef ParentName =
10216           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10217       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10218     }
10219   }
10220 
10221   // Do not to emit variable if it is not marked as declare target.
10222   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10223       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10224           cast<VarDecl>(GD.getDecl()));
10225   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10226       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10227        HasRequiresUnifiedSharedMemory)) {
10228     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10229     return true;
10230   }
10231   return false;
10232 }
10233 
10234 llvm::Constant *
10235 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10236                                                 const VarDecl *VD) {
10237   assert(VD->getType().isConstant(CGM.getContext()) &&
10238          "Expected constant variable.");
10239   StringRef VarName;
10240   llvm::Constant *Addr;
10241   llvm::GlobalValue::LinkageTypes Linkage;
10242   QualType Ty = VD->getType();
10243   SmallString<128> Buffer;
10244   {
10245     unsigned DeviceID;
10246     unsigned FileID;
10247     unsigned Line;
10248     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10249                              FileID, Line);
10250     llvm::raw_svector_ostream OS(Buffer);
10251     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10252        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10253     VarName = OS.str();
10254   }
10255   Linkage = llvm::GlobalValue::InternalLinkage;
10256   Addr =
10257       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10258                                   getDefaultFirstprivateAddressSpace());
10259   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10260   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10261   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10262   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10263       VarName, Addr, VarSize,
10264       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10265   return Addr;
10266 }
10267 
10268 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10269                                                    llvm::Constant *Addr) {
10270   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10271       !CGM.getLangOpts().OpenMPIsDevice)
10272     return;
10273   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10274       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10275   if (!Res) {
10276     if (CGM.getLangOpts().OpenMPIsDevice) {
10277       // Register non-target variables being emitted in device code (debug info
10278       // may cause this).
10279       StringRef VarName = CGM.getMangledName(VD);
10280       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10281     }
10282     return;
10283   }
10284   // Register declare target variables.
10285   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10286   StringRef VarName;
10287   CharUnits VarSize;
10288   llvm::GlobalValue::LinkageTypes Linkage;
10289 
10290   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10291       !HasRequiresUnifiedSharedMemory) {
10292     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10293     VarName = CGM.getMangledName(VD);
10294     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10295       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10296       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10297     } else {
10298       VarSize = CharUnits::Zero();
10299     }
10300     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10301     // Temp solution to prevent optimizations of the internal variables.
10302     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10303       std::string RefName = getName({VarName, "ref"});
10304       if (!CGM.GetGlobalValue(RefName)) {
10305         llvm::Constant *AddrRef =
10306             getOrCreateInternalVariable(Addr->getType(), RefName);
10307         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10308         GVAddrRef->setConstant(/*Val=*/true);
10309         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10310         GVAddrRef->setInitializer(Addr);
10311         CGM.addCompilerUsedGlobal(GVAddrRef);
10312       }
10313     }
10314   } else {
10315     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10316             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10317              HasRequiresUnifiedSharedMemory)) &&
10318            "Declare target attribute must link or to with unified memory.");
10319     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10320       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10321     else
10322       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10323 
10324     if (CGM.getLangOpts().OpenMPIsDevice) {
10325       VarName = Addr->getName();
10326       Addr = nullptr;
10327     } else {
10328       VarName = getAddrOfDeclareTargetVar(VD).getName();
10329       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10330     }
10331     VarSize = CGM.getPointerSize();
10332     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10333   }
10334 
10335   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10336       VarName, Addr, VarSize, Flags, Linkage);
10337 }
10338 
10339 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10340   if (isa<FunctionDecl>(GD.getDecl()) ||
10341       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10342     return emitTargetFunctions(GD);
10343 
10344   return emitTargetGlobalVariable(GD);
10345 }
10346 
10347 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10348   for (const VarDecl *VD : DeferredGlobalVariables) {
10349     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10350         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10351     if (!Res)
10352       continue;
10353     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10354         !HasRequiresUnifiedSharedMemory) {
10355       CGM.EmitGlobal(VD);
10356     } else {
10357       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10358               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10359                HasRequiresUnifiedSharedMemory)) &&
10360              "Expected link clause or to clause with unified memory.");
10361       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10362     }
10363   }
10364 }
10365 
10366 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10367     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10368   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10369          " Expected target-based directive.");
10370 }
10371 
10372 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10373   for (const OMPClause *Clause : D->clauselists()) {
10374     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10375       HasRequiresUnifiedSharedMemory = true;
10376     } else if (const auto *AC =
10377                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10378       switch (AC->getAtomicDefaultMemOrderKind()) {
10379       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10380         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10381         break;
10382       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10383         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10384         break;
10385       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10386         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10387         break;
10388       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10389         break;
10390       }
10391     }
10392   }
10393 }
10394 
10395 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10396   return RequiresAtomicOrdering;
10397 }
10398 
10399 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10400                                                        LangAS &AS) {
10401   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10402     return false;
10403   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10404   switch(A->getAllocatorType()) {
10405   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10406   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10407   // Not supported, fallback to the default mem space.
10408   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10409   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10410   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10411   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10412   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10413   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10414   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10415     AS = LangAS::Default;
10416     return true;
10417   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10418     llvm_unreachable("Expected predefined allocator for the variables with the "
10419                      "static storage.");
10420   }
10421   return false;
10422 }
10423 
10424 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10425   return HasRequiresUnifiedSharedMemory;
10426 }
10427 
10428 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10429     CodeGenModule &CGM)
10430     : CGM(CGM) {
10431   if (CGM.getLangOpts().OpenMPIsDevice) {
10432     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10433     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10434   }
10435 }
10436 
10437 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10438   if (CGM.getLangOpts().OpenMPIsDevice)
10439     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10440 }
10441 
10442 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10443   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10444     return true;
10445 
10446   const auto *D = cast<FunctionDecl>(GD.getDecl());
10447   // Do not to emit function if it is marked as declare target as it was already
10448   // emitted.
10449   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10450     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10451       if (auto *F = dyn_cast_or_null<llvm::Function>(
10452               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10453         return !F->isDeclaration();
10454       return false;
10455     }
10456     return true;
10457   }
10458 
10459   return !AlreadyEmittedTargetDecls.insert(D).second;
10460 }
10461 
10462 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10463   // If we don't have entries or if we are emitting code for the device, we
10464   // don't need to do anything.
10465   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10466       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10467       (OffloadEntriesInfoManager.empty() &&
10468        !HasEmittedDeclareTargetRegion &&
10469        !HasEmittedTargetRegion))
10470     return nullptr;
10471 
10472   // Create and register the function that handles the requires directives.
10473   ASTContext &C = CGM.getContext();
10474 
10475   llvm::Function *RequiresRegFn;
10476   {
10477     CodeGenFunction CGF(CGM);
10478     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10479     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10480     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10481     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10482     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10483     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10484     // TODO: check for other requires clauses.
10485     // The requires directive takes effect only when a target region is
10486     // present in the compilation unit. Otherwise it is ignored and not
10487     // passed to the runtime. This avoids the runtime from throwing an error
10488     // for mismatching requires clauses across compilation units that don't
10489     // contain at least 1 target region.
10490     assert((HasEmittedTargetRegion ||
10491             HasEmittedDeclareTargetRegion ||
10492             !OffloadEntriesInfoManager.empty()) &&
10493            "Target or declare target region expected.");
10494     if (HasRequiresUnifiedSharedMemory)
10495       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10496     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10497                             CGM.getModule(), OMPRTL___tgt_register_requires),
10498                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10499     CGF.FinishFunction();
10500   }
10501   return RequiresRegFn;
10502 }
10503 
10504 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10505                                     const OMPExecutableDirective &D,
10506                                     SourceLocation Loc,
10507                                     llvm::Function *OutlinedFn,
10508                                     ArrayRef<llvm::Value *> CapturedVars) {
10509   if (!CGF.HaveInsertPoint())
10510     return;
10511 
10512   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10513   CodeGenFunction::RunCleanupsScope Scope(CGF);
10514 
10515   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10516   llvm::Value *Args[] = {
10517       RTLoc,
10518       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10519       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10520   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10521   RealArgs.append(std::begin(Args), std::end(Args));
10522   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10523 
10524   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10525       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10526   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10527 }
10528 
10529 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10530                                          const Expr *NumTeams,
10531                                          const Expr *ThreadLimit,
10532                                          SourceLocation Loc) {
10533   if (!CGF.HaveInsertPoint())
10534     return;
10535 
10536   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10537 
10538   llvm::Value *NumTeamsVal =
10539       NumTeams
10540           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10541                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10542           : CGF.Builder.getInt32(0);
10543 
10544   llvm::Value *ThreadLimitVal =
10545       ThreadLimit
10546           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10547                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10548           : CGF.Builder.getInt32(0);
10549 
10550   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10551   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10552                                      ThreadLimitVal};
10553   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10554                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10555                       PushNumTeamsArgs);
10556 }
10557 
10558 void CGOpenMPRuntime::emitTargetDataCalls(
10559     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10560     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10561   if (!CGF.HaveInsertPoint())
10562     return;
10563 
10564   // Action used to replace the default codegen action and turn privatization
10565   // off.
10566   PrePostActionTy NoPrivAction;
10567 
10568   // Generate the code for the opening of the data environment. Capture all the
10569   // arguments of the runtime call by reference because they are used in the
10570   // closing of the region.
10571   auto &&BeginThenGen = [this, &D, Device, &Info,
10572                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10573     // Fill up the arrays with all the mapped variables.
10574     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10575 
10576     // Get map clause information.
10577     MappableExprsHandler MEHandler(D, CGF);
10578     MEHandler.generateAllInfo(CombinedInfo);
10579 
10580     // Fill up the arrays and create the arguments.
10581     emitOffloadingArrays(CGF, CombinedInfo, Info, /*IsNonContiguous=*/true);
10582 
10583     llvm::Value *BasePointersArrayArg = nullptr;
10584     llvm::Value *PointersArrayArg = nullptr;
10585     llvm::Value *SizesArrayArg = nullptr;
10586     llvm::Value *MapTypesArrayArg = nullptr;
10587     llvm::Value *MappersArrayArg = nullptr;
10588     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10589                                  SizesArrayArg, MapTypesArrayArg,
10590                                  MappersArrayArg, Info);
10591 
10592     // Emit device ID if any.
10593     llvm::Value *DeviceID = nullptr;
10594     if (Device) {
10595       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10596                                            CGF.Int64Ty, /*isSigned=*/true);
10597     } else {
10598       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10599     }
10600 
10601     // Emit the number of elements in the offloading arrays.
10602     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10603 
10604     llvm::Value *OffloadingArgs[] = {
10605         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10606         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10607     CGF.EmitRuntimeCall(
10608         OMPBuilder.getOrCreateRuntimeFunction(
10609             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10610         OffloadingArgs);
10611 
10612     // If device pointer privatization is required, emit the body of the region
10613     // here. It will have to be duplicated: with and without privatization.
10614     if (!Info.CaptureDeviceAddrMap.empty())
10615       CodeGen(CGF);
10616   };
10617 
10618   // Generate code for the closing of the data region.
10619   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10620                                             PrePostActionTy &) {
10621     assert(Info.isValid() && "Invalid data environment closing arguments.");
10622 
10623     llvm::Value *BasePointersArrayArg = nullptr;
10624     llvm::Value *PointersArrayArg = nullptr;
10625     llvm::Value *SizesArrayArg = nullptr;
10626     llvm::Value *MapTypesArrayArg = nullptr;
10627     llvm::Value *MappersArrayArg = nullptr;
10628     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10629                                  SizesArrayArg, MapTypesArrayArg,
10630                                  MappersArrayArg, Info, {/*ForEndCall=*/true});
10631 
10632     // Emit device ID if any.
10633     llvm::Value *DeviceID = nullptr;
10634     if (Device) {
10635       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10636                                            CGF.Int64Ty, /*isSigned=*/true);
10637     } else {
10638       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10639     }
10640 
10641     // Emit the number of elements in the offloading arrays.
10642     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10643 
10644     llvm::Value *OffloadingArgs[] = {
10645         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10646         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10647     CGF.EmitRuntimeCall(
10648         OMPBuilder.getOrCreateRuntimeFunction(
10649             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10650         OffloadingArgs);
10651   };
10652 
10653   // If we need device pointer privatization, we need to emit the body of the
10654   // region with no privatization in the 'else' branch of the conditional.
10655   // Otherwise, we don't have to do anything.
10656   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10657                                                          PrePostActionTy &) {
10658     if (!Info.CaptureDeviceAddrMap.empty()) {
10659       CodeGen.setAction(NoPrivAction);
10660       CodeGen(CGF);
10661     }
10662   };
10663 
10664   // We don't have to do anything to close the region if the if clause evaluates
10665   // to false.
10666   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10667 
10668   if (IfCond) {
10669     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10670   } else {
10671     RegionCodeGenTy RCG(BeginThenGen);
10672     RCG(CGF);
10673   }
10674 
10675   // If we don't require privatization of device pointers, we emit the body in
10676   // between the runtime calls. This avoids duplicating the body code.
10677   if (Info.CaptureDeviceAddrMap.empty()) {
10678     CodeGen.setAction(NoPrivAction);
10679     CodeGen(CGF);
10680   }
10681 
10682   if (IfCond) {
10683     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10684   } else {
10685     RegionCodeGenTy RCG(EndThenGen);
10686     RCG(CGF);
10687   }
10688 }
10689 
10690 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10691     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10692     const Expr *Device) {
10693   if (!CGF.HaveInsertPoint())
10694     return;
10695 
10696   assert((isa<OMPTargetEnterDataDirective>(D) ||
10697           isa<OMPTargetExitDataDirective>(D) ||
10698           isa<OMPTargetUpdateDirective>(D)) &&
10699          "Expecting either target enter, exit data, or update directives.");
10700 
10701   CodeGenFunction::OMPTargetDataInfo InputInfo;
10702   llvm::Value *MapTypesArray = nullptr;
10703   // Generate the code for the opening of the data environment.
10704   auto &&ThenGen = [this, &D, Device, &InputInfo,
10705                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10706     // Emit device ID if any.
10707     llvm::Value *DeviceID = nullptr;
10708     if (Device) {
10709       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10710                                            CGF.Int64Ty, /*isSigned=*/true);
10711     } else {
10712       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10713     }
10714 
10715     // Emit the number of elements in the offloading arrays.
10716     llvm::Constant *PointerNum =
10717         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10718 
10719     llvm::Value *OffloadingArgs[] = {DeviceID,
10720                                      PointerNum,
10721                                      InputInfo.BasePointersArray.getPointer(),
10722                                      InputInfo.PointersArray.getPointer(),
10723                                      InputInfo.SizesArray.getPointer(),
10724                                      MapTypesArray,
10725                                      InputInfo.MappersArray.getPointer()};
10726 
10727     // Select the right runtime function call for each standalone
10728     // directive.
10729     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10730     RuntimeFunction RTLFn;
10731     switch (D.getDirectiveKind()) {
10732     case OMPD_target_enter_data:
10733       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10734                         : OMPRTL___tgt_target_data_begin_mapper;
10735       break;
10736     case OMPD_target_exit_data:
10737       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10738                         : OMPRTL___tgt_target_data_end_mapper;
10739       break;
10740     case OMPD_target_update:
10741       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10742                         : OMPRTL___tgt_target_data_update_mapper;
10743       break;
10744     case OMPD_parallel:
10745     case OMPD_for:
10746     case OMPD_parallel_for:
10747     case OMPD_parallel_master:
10748     case OMPD_parallel_sections:
10749     case OMPD_for_simd:
10750     case OMPD_parallel_for_simd:
10751     case OMPD_cancel:
10752     case OMPD_cancellation_point:
10753     case OMPD_ordered:
10754     case OMPD_threadprivate:
10755     case OMPD_allocate:
10756     case OMPD_task:
10757     case OMPD_simd:
10758     case OMPD_sections:
10759     case OMPD_section:
10760     case OMPD_single:
10761     case OMPD_master:
10762     case OMPD_critical:
10763     case OMPD_taskyield:
10764     case OMPD_barrier:
10765     case OMPD_taskwait:
10766     case OMPD_taskgroup:
10767     case OMPD_atomic:
10768     case OMPD_flush:
10769     case OMPD_depobj:
10770     case OMPD_scan:
10771     case OMPD_teams:
10772     case OMPD_target_data:
10773     case OMPD_distribute:
10774     case OMPD_distribute_simd:
10775     case OMPD_distribute_parallel_for:
10776     case OMPD_distribute_parallel_for_simd:
10777     case OMPD_teams_distribute:
10778     case OMPD_teams_distribute_simd:
10779     case OMPD_teams_distribute_parallel_for:
10780     case OMPD_teams_distribute_parallel_for_simd:
10781     case OMPD_declare_simd:
10782     case OMPD_declare_variant:
10783     case OMPD_begin_declare_variant:
10784     case OMPD_end_declare_variant:
10785     case OMPD_declare_target:
10786     case OMPD_end_declare_target:
10787     case OMPD_declare_reduction:
10788     case OMPD_declare_mapper:
10789     case OMPD_taskloop:
10790     case OMPD_taskloop_simd:
10791     case OMPD_master_taskloop:
10792     case OMPD_master_taskloop_simd:
10793     case OMPD_parallel_master_taskloop:
10794     case OMPD_parallel_master_taskloop_simd:
10795     case OMPD_target:
10796     case OMPD_target_simd:
10797     case OMPD_target_teams_distribute:
10798     case OMPD_target_teams_distribute_simd:
10799     case OMPD_target_teams_distribute_parallel_for:
10800     case OMPD_target_teams_distribute_parallel_for_simd:
10801     case OMPD_target_teams:
10802     case OMPD_target_parallel:
10803     case OMPD_target_parallel_for:
10804     case OMPD_target_parallel_for_simd:
10805     case OMPD_requires:
10806     case OMPD_unknown:
10807     default:
10808       llvm_unreachable("Unexpected standalone target data directive.");
10809       break;
10810     }
10811     CGF.EmitRuntimeCall(
10812         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10813         OffloadingArgs);
10814   };
10815 
10816   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10817                              CodeGenFunction &CGF, PrePostActionTy &) {
10818     // Fill up the arrays with all the mapped variables.
10819     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10820 
10821     // Get map clause information.
10822     MappableExprsHandler MEHandler(D, CGF);
10823     MEHandler.generateAllInfo(CombinedInfo);
10824 
10825     TargetDataInfo Info;
10826     // Fill up the arrays and create the arguments.
10827     emitOffloadingArrays(CGF, CombinedInfo, Info, /*IsNonContiguous=*/true);
10828     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10829                              D.hasClausesOfKind<OMPNowaitClause>();
10830     emitOffloadingArraysArgument(
10831         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10832         Info.MapTypesArray, Info.MappersArray, Info, {/*ForEndTask=*/false});
10833     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10834     InputInfo.BasePointersArray =
10835         Address(Info.BasePointersArray, CGM.getPointerAlign());
10836     InputInfo.PointersArray =
10837         Address(Info.PointersArray, CGM.getPointerAlign());
10838     InputInfo.SizesArray =
10839         Address(Info.SizesArray, CGM.getPointerAlign());
10840     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10841     MapTypesArray = Info.MapTypesArray;
10842     if (RequiresOuterTask)
10843       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10844     else
10845       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10846   };
10847 
10848   if (IfCond) {
10849     emitIfClause(CGF, IfCond, TargetThenGen,
10850                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10851   } else {
10852     RegionCodeGenTy ThenRCG(TargetThenGen);
10853     ThenRCG(CGF);
10854   }
10855 }
10856 
10857 namespace {
10858   /// Kind of parameter in a function with 'declare simd' directive.
10859   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10860   /// Attribute set of the parameter.
10861   struct ParamAttrTy {
10862     ParamKindTy Kind = Vector;
10863     llvm::APSInt StrideOrArg;
10864     llvm::APSInt Alignment;
10865   };
10866 } // namespace
10867 
10868 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10869                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10870   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10871   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10872   // of that clause. The VLEN value must be power of 2.
10873   // In other case the notion of the function`s "characteristic data type" (CDT)
10874   // is used to compute the vector length.
10875   // CDT is defined in the following order:
10876   //   a) For non-void function, the CDT is the return type.
10877   //   b) If the function has any non-uniform, non-linear parameters, then the
10878   //   CDT is the type of the first such parameter.
10879   //   c) If the CDT determined by a) or b) above is struct, union, or class
10880   //   type which is pass-by-value (except for the type that maps to the
10881   //   built-in complex data type), the characteristic data type is int.
10882   //   d) If none of the above three cases is applicable, the CDT is int.
10883   // The VLEN is then determined based on the CDT and the size of vector
10884   // register of that ISA for which current vector version is generated. The
10885   // VLEN is computed using the formula below:
10886   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10887   // where vector register size specified in section 3.2.1 Registers and the
10888   // Stack Frame of original AMD64 ABI document.
10889   QualType RetType = FD->getReturnType();
10890   if (RetType.isNull())
10891     return 0;
10892   ASTContext &C = FD->getASTContext();
10893   QualType CDT;
10894   if (!RetType.isNull() && !RetType->isVoidType()) {
10895     CDT = RetType;
10896   } else {
10897     unsigned Offset = 0;
10898     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10899       if (ParamAttrs[Offset].Kind == Vector)
10900         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10901       ++Offset;
10902     }
10903     if (CDT.isNull()) {
10904       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10905         if (ParamAttrs[I + Offset].Kind == Vector) {
10906           CDT = FD->getParamDecl(I)->getType();
10907           break;
10908         }
10909       }
10910     }
10911   }
10912   if (CDT.isNull())
10913     CDT = C.IntTy;
10914   CDT = CDT->getCanonicalTypeUnqualified();
10915   if (CDT->isRecordType() || CDT->isUnionType())
10916     CDT = C.IntTy;
10917   return C.getTypeSize(CDT);
10918 }
10919 
10920 static void
10921 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10922                            const llvm::APSInt &VLENVal,
10923                            ArrayRef<ParamAttrTy> ParamAttrs,
10924                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10925   struct ISADataTy {
10926     char ISA;
10927     unsigned VecRegSize;
10928   };
10929   ISADataTy ISAData[] = {
10930       {
10931           'b', 128
10932       }, // SSE
10933       {
10934           'c', 256
10935       }, // AVX
10936       {
10937           'd', 256
10938       }, // AVX2
10939       {
10940           'e', 512
10941       }, // AVX512
10942   };
10943   llvm::SmallVector<char, 2> Masked;
10944   switch (State) {
10945   case OMPDeclareSimdDeclAttr::BS_Undefined:
10946     Masked.push_back('N');
10947     Masked.push_back('M');
10948     break;
10949   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10950     Masked.push_back('N');
10951     break;
10952   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10953     Masked.push_back('M');
10954     break;
10955   }
10956   for (char Mask : Masked) {
10957     for (const ISADataTy &Data : ISAData) {
10958       SmallString<256> Buffer;
10959       llvm::raw_svector_ostream Out(Buffer);
10960       Out << "_ZGV" << Data.ISA << Mask;
10961       if (!VLENVal) {
10962         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10963         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10964         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10965       } else {
10966         Out << VLENVal;
10967       }
10968       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10969         switch (ParamAttr.Kind){
10970         case LinearWithVarStride:
10971           Out << 's' << ParamAttr.StrideOrArg;
10972           break;
10973         case Linear:
10974           Out << 'l';
10975           if (ParamAttr.StrideOrArg != 1)
10976             Out << ParamAttr.StrideOrArg;
10977           break;
10978         case Uniform:
10979           Out << 'u';
10980           break;
10981         case Vector:
10982           Out << 'v';
10983           break;
10984         }
10985         if (!!ParamAttr.Alignment)
10986           Out << 'a' << ParamAttr.Alignment;
10987       }
10988       Out << '_' << Fn->getName();
10989       Fn->addFnAttr(Out.str());
10990     }
10991   }
10992 }
10993 
10994 // This are the Functions that are needed to mangle the name of the
10995 // vector functions generated by the compiler, according to the rules
10996 // defined in the "Vector Function ABI specifications for AArch64",
10997 // available at
10998 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10999 
11000 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11001 ///
11002 /// TODO: Need to implement the behavior for reference marked with a
11003 /// var or no linear modifiers (1.b in the section). For this, we
11004 /// need to extend ParamKindTy to support the linear modifiers.
11005 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11006   QT = QT.getCanonicalType();
11007 
11008   if (QT->isVoidType())
11009     return false;
11010 
11011   if (Kind == ParamKindTy::Uniform)
11012     return false;
11013 
11014   if (Kind == ParamKindTy::Linear)
11015     return false;
11016 
11017   // TODO: Handle linear references with modifiers
11018 
11019   if (Kind == ParamKindTy::LinearWithVarStride)
11020     return false;
11021 
11022   return true;
11023 }
11024 
11025 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11026 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11027   QT = QT.getCanonicalType();
11028   unsigned Size = C.getTypeSize(QT);
11029 
11030   // Only scalars and complex within 16 bytes wide set PVB to true.
11031   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11032     return false;
11033 
11034   if (QT->isFloatingType())
11035     return true;
11036 
11037   if (QT->isIntegerType())
11038     return true;
11039 
11040   if (QT->isPointerType())
11041     return true;
11042 
11043   // TODO: Add support for complex types (section 3.1.2, item 2).
11044 
11045   return false;
11046 }
11047 
11048 /// Computes the lane size (LS) of a return type or of an input parameter,
11049 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11050 /// TODO: Add support for references, section 3.2.1, item 1.
11051 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11052   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11053     QualType PTy = QT.getCanonicalType()->getPointeeType();
11054     if (getAArch64PBV(PTy, C))
11055       return C.getTypeSize(PTy);
11056   }
11057   if (getAArch64PBV(QT, C))
11058     return C.getTypeSize(QT);
11059 
11060   return C.getTypeSize(C.getUIntPtrType());
11061 }
11062 
11063 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11064 // signature of the scalar function, as defined in 3.2.2 of the
11065 // AAVFABI.
11066 static std::tuple<unsigned, unsigned, bool>
11067 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11068   QualType RetType = FD->getReturnType().getCanonicalType();
11069 
11070   ASTContext &C = FD->getASTContext();
11071 
11072   bool OutputBecomesInput = false;
11073 
11074   llvm::SmallVector<unsigned, 8> Sizes;
11075   if (!RetType->isVoidType()) {
11076     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11077     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11078       OutputBecomesInput = true;
11079   }
11080   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11081     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11082     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11083   }
11084 
11085   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11086   // The LS of a function parameter / return value can only be a power
11087   // of 2, starting from 8 bits, up to 128.
11088   assert(std::all_of(Sizes.begin(), Sizes.end(),
11089                      [](unsigned Size) {
11090                        return Size == 8 || Size == 16 || Size == 32 ||
11091                               Size == 64 || Size == 128;
11092                      }) &&
11093          "Invalid size");
11094 
11095   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11096                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11097                          OutputBecomesInput);
11098 }
11099 
11100 /// Mangle the parameter part of the vector function name according to
11101 /// their OpenMP classification. The mangling function is defined in
11102 /// section 3.5 of the AAVFABI.
11103 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11104   SmallString<256> Buffer;
11105   llvm::raw_svector_ostream Out(Buffer);
11106   for (const auto &ParamAttr : ParamAttrs) {
11107     switch (ParamAttr.Kind) {
11108     case LinearWithVarStride:
11109       Out << "ls" << ParamAttr.StrideOrArg;
11110       break;
11111     case Linear:
11112       Out << 'l';
11113       // Don't print the step value if it is not present or if it is
11114       // equal to 1.
11115       if (ParamAttr.StrideOrArg != 1)
11116         Out << ParamAttr.StrideOrArg;
11117       break;
11118     case Uniform:
11119       Out << 'u';
11120       break;
11121     case Vector:
11122       Out << 'v';
11123       break;
11124     }
11125 
11126     if (!!ParamAttr.Alignment)
11127       Out << 'a' << ParamAttr.Alignment;
11128   }
11129 
11130   return std::string(Out.str());
11131 }
11132 
11133 // Function used to add the attribute. The parameter `VLEN` is
11134 // templated to allow the use of "x" when targeting scalable functions
11135 // for SVE.
11136 template <typename T>
11137 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11138                                  char ISA, StringRef ParSeq,
11139                                  StringRef MangledName, bool OutputBecomesInput,
11140                                  llvm::Function *Fn) {
11141   SmallString<256> Buffer;
11142   llvm::raw_svector_ostream Out(Buffer);
11143   Out << Prefix << ISA << LMask << VLEN;
11144   if (OutputBecomesInput)
11145     Out << "v";
11146   Out << ParSeq << "_" << MangledName;
11147   Fn->addFnAttr(Out.str());
11148 }
11149 
11150 // Helper function to generate the Advanced SIMD names depending on
11151 // the value of the NDS when simdlen is not present.
11152 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11153                                       StringRef Prefix, char ISA,
11154                                       StringRef ParSeq, StringRef MangledName,
11155                                       bool OutputBecomesInput,
11156                                       llvm::Function *Fn) {
11157   switch (NDS) {
11158   case 8:
11159     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11160                          OutputBecomesInput, Fn);
11161     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11162                          OutputBecomesInput, Fn);
11163     break;
11164   case 16:
11165     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11166                          OutputBecomesInput, Fn);
11167     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11168                          OutputBecomesInput, Fn);
11169     break;
11170   case 32:
11171     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11172                          OutputBecomesInput, Fn);
11173     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11174                          OutputBecomesInput, Fn);
11175     break;
11176   case 64:
11177   case 128:
11178     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11179                          OutputBecomesInput, Fn);
11180     break;
11181   default:
11182     llvm_unreachable("Scalar type is too wide.");
11183   }
11184 }
11185 
11186 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11187 static void emitAArch64DeclareSimdFunction(
11188     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11189     ArrayRef<ParamAttrTy> ParamAttrs,
11190     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11191     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11192 
11193   // Get basic data for building the vector signature.
11194   const auto Data = getNDSWDS(FD, ParamAttrs);
11195   const unsigned NDS = std::get<0>(Data);
11196   const unsigned WDS = std::get<1>(Data);
11197   const bool OutputBecomesInput = std::get<2>(Data);
11198 
11199   // Check the values provided via `simdlen` by the user.
11200   // 1. A `simdlen(1)` doesn't produce vector signatures,
11201   if (UserVLEN == 1) {
11202     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11203         DiagnosticsEngine::Warning,
11204         "The clause simdlen(1) has no effect when targeting aarch64.");
11205     CGM.getDiags().Report(SLoc, DiagID);
11206     return;
11207   }
11208 
11209   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11210   // Advanced SIMD output.
11211   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11212     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11213         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11214                                     "power of 2 when targeting Advanced SIMD.");
11215     CGM.getDiags().Report(SLoc, DiagID);
11216     return;
11217   }
11218 
11219   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11220   // limits.
11221   if (ISA == 's' && UserVLEN != 0) {
11222     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11223       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11224           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11225                                       "lanes in the architectural constraints "
11226                                       "for SVE (min is 128-bit, max is "
11227                                       "2048-bit, by steps of 128-bit)");
11228       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11229       return;
11230     }
11231   }
11232 
11233   // Sort out parameter sequence.
11234   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11235   StringRef Prefix = "_ZGV";
11236   // Generate simdlen from user input (if any).
11237   if (UserVLEN) {
11238     if (ISA == 's') {
11239       // SVE generates only a masked function.
11240       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11241                            OutputBecomesInput, Fn);
11242     } else {
11243       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11244       // Advanced SIMD generates one or two functions, depending on
11245       // the `[not]inbranch` clause.
11246       switch (State) {
11247       case OMPDeclareSimdDeclAttr::BS_Undefined:
11248         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11249                              OutputBecomesInput, Fn);
11250         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11251                              OutputBecomesInput, Fn);
11252         break;
11253       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11254         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11255                              OutputBecomesInput, Fn);
11256         break;
11257       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11258         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11259                              OutputBecomesInput, Fn);
11260         break;
11261       }
11262     }
11263   } else {
11264     // If no user simdlen is provided, follow the AAVFABI rules for
11265     // generating the vector length.
11266     if (ISA == 's') {
11267       // SVE, section 3.4.1, item 1.
11268       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11269                            OutputBecomesInput, Fn);
11270     } else {
11271       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11272       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11273       // two vector names depending on the use of the clause
11274       // `[not]inbranch`.
11275       switch (State) {
11276       case OMPDeclareSimdDeclAttr::BS_Undefined:
11277         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11278                                   OutputBecomesInput, Fn);
11279         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11280                                   OutputBecomesInput, Fn);
11281         break;
11282       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11283         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11284                                   OutputBecomesInput, Fn);
11285         break;
11286       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11287         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11288                                   OutputBecomesInput, Fn);
11289         break;
11290       }
11291     }
11292   }
11293 }
11294 
11295 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11296                                               llvm::Function *Fn) {
11297   ASTContext &C = CGM.getContext();
11298   FD = FD->getMostRecentDecl();
11299   // Map params to their positions in function decl.
11300   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11301   if (isa<CXXMethodDecl>(FD))
11302     ParamPositions.try_emplace(FD, 0);
11303   unsigned ParamPos = ParamPositions.size();
11304   for (const ParmVarDecl *P : FD->parameters()) {
11305     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11306     ++ParamPos;
11307   }
11308   while (FD) {
11309     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11310       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11311       // Mark uniform parameters.
11312       for (const Expr *E : Attr->uniforms()) {
11313         E = E->IgnoreParenImpCasts();
11314         unsigned Pos;
11315         if (isa<CXXThisExpr>(E)) {
11316           Pos = ParamPositions[FD];
11317         } else {
11318           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11319                                 ->getCanonicalDecl();
11320           Pos = ParamPositions[PVD];
11321         }
11322         ParamAttrs[Pos].Kind = Uniform;
11323       }
11324       // Get alignment info.
11325       auto NI = Attr->alignments_begin();
11326       for (const Expr *E : Attr->aligneds()) {
11327         E = E->IgnoreParenImpCasts();
11328         unsigned Pos;
11329         QualType ParmTy;
11330         if (isa<CXXThisExpr>(E)) {
11331           Pos = ParamPositions[FD];
11332           ParmTy = E->getType();
11333         } else {
11334           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11335                                 ->getCanonicalDecl();
11336           Pos = ParamPositions[PVD];
11337           ParmTy = PVD->getType();
11338         }
11339         ParamAttrs[Pos].Alignment =
11340             (*NI)
11341                 ? (*NI)->EvaluateKnownConstInt(C)
11342                 : llvm::APSInt::getUnsigned(
11343                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11344                           .getQuantity());
11345         ++NI;
11346       }
11347       // Mark linear parameters.
11348       auto SI = Attr->steps_begin();
11349       auto MI = Attr->modifiers_begin();
11350       for (const Expr *E : Attr->linears()) {
11351         E = E->IgnoreParenImpCasts();
11352         unsigned Pos;
11353         // Rescaling factor needed to compute the linear parameter
11354         // value in the mangled name.
11355         unsigned PtrRescalingFactor = 1;
11356         if (isa<CXXThisExpr>(E)) {
11357           Pos = ParamPositions[FD];
11358         } else {
11359           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11360                                 ->getCanonicalDecl();
11361           Pos = ParamPositions[PVD];
11362           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11363             PtrRescalingFactor = CGM.getContext()
11364                                      .getTypeSizeInChars(P->getPointeeType())
11365                                      .getQuantity();
11366         }
11367         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11368         ParamAttr.Kind = Linear;
11369         // Assuming a stride of 1, for `linear` without modifiers.
11370         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11371         if (*SI) {
11372           Expr::EvalResult Result;
11373           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11374             if (const auto *DRE =
11375                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11376               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11377                 ParamAttr.Kind = LinearWithVarStride;
11378                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11379                     ParamPositions[StridePVD->getCanonicalDecl()]);
11380               }
11381             }
11382           } else {
11383             ParamAttr.StrideOrArg = Result.Val.getInt();
11384           }
11385         }
11386         // If we are using a linear clause on a pointer, we need to
11387         // rescale the value of linear_step with the byte size of the
11388         // pointee type.
11389         if (Linear == ParamAttr.Kind)
11390           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11391         ++SI;
11392         ++MI;
11393       }
11394       llvm::APSInt VLENVal;
11395       SourceLocation ExprLoc;
11396       const Expr *VLENExpr = Attr->getSimdlen();
11397       if (VLENExpr) {
11398         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11399         ExprLoc = VLENExpr->getExprLoc();
11400       }
11401       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11402       if (CGM.getTriple().isX86()) {
11403         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11404       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11405         unsigned VLEN = VLENVal.getExtValue();
11406         StringRef MangledName = Fn->getName();
11407         if (CGM.getTarget().hasFeature("sve"))
11408           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11409                                          MangledName, 's', 128, Fn, ExprLoc);
11410         if (CGM.getTarget().hasFeature("neon"))
11411           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11412                                          MangledName, 'n', 128, Fn, ExprLoc);
11413       }
11414     }
11415     FD = FD->getPreviousDecl();
11416   }
11417 }
11418 
11419 namespace {
11420 /// Cleanup action for doacross support.
11421 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11422 public:
11423   static const int DoacrossFinArgs = 2;
11424 
11425 private:
11426   llvm::FunctionCallee RTLFn;
11427   llvm::Value *Args[DoacrossFinArgs];
11428 
11429 public:
11430   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11431                     ArrayRef<llvm::Value *> CallArgs)
11432       : RTLFn(RTLFn) {
11433     assert(CallArgs.size() == DoacrossFinArgs);
11434     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11435   }
11436   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11437     if (!CGF.HaveInsertPoint())
11438       return;
11439     CGF.EmitRuntimeCall(RTLFn, Args);
11440   }
11441 };
11442 } // namespace
11443 
11444 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11445                                        const OMPLoopDirective &D,
11446                                        ArrayRef<Expr *> NumIterations) {
11447   if (!CGF.HaveInsertPoint())
11448     return;
11449 
11450   ASTContext &C = CGM.getContext();
11451   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11452   RecordDecl *RD;
11453   if (KmpDimTy.isNull()) {
11454     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11455     //  kmp_int64 lo; // lower
11456     //  kmp_int64 up; // upper
11457     //  kmp_int64 st; // stride
11458     // };
11459     RD = C.buildImplicitRecord("kmp_dim");
11460     RD->startDefinition();
11461     addFieldToRecordDecl(C, RD, Int64Ty);
11462     addFieldToRecordDecl(C, RD, Int64Ty);
11463     addFieldToRecordDecl(C, RD, Int64Ty);
11464     RD->completeDefinition();
11465     KmpDimTy = C.getRecordType(RD);
11466   } else {
11467     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11468   }
11469   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11470   QualType ArrayTy =
11471       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11472 
11473   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11474   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11475   enum { LowerFD = 0, UpperFD, StrideFD };
11476   // Fill dims with data.
11477   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11478     LValue DimsLVal = CGF.MakeAddrLValue(
11479         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11480     // dims.upper = num_iterations;
11481     LValue UpperLVal = CGF.EmitLValueForField(
11482         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11483     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11484         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11485         Int64Ty, NumIterations[I]->getExprLoc());
11486     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11487     // dims.stride = 1;
11488     LValue StrideLVal = CGF.EmitLValueForField(
11489         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11490     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11491                           StrideLVal);
11492   }
11493 
11494   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11495   // kmp_int32 num_dims, struct kmp_dim * dims);
11496   llvm::Value *Args[] = {
11497       emitUpdateLocation(CGF, D.getBeginLoc()),
11498       getThreadID(CGF, D.getBeginLoc()),
11499       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11500       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11501           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11502           CGM.VoidPtrTy)};
11503 
11504   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11505       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11506   CGF.EmitRuntimeCall(RTLFn, Args);
11507   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11508       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11509   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11510       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11511   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11512                                              llvm::makeArrayRef(FiniArgs));
11513 }
11514 
11515 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11516                                           const OMPDependClause *C) {
11517   QualType Int64Ty =
11518       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11519   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11520   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11521       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11522   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11523   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11524     const Expr *CounterVal = C->getLoopData(I);
11525     assert(CounterVal);
11526     llvm::Value *CntVal = CGF.EmitScalarConversion(
11527         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11528         CounterVal->getExprLoc());
11529     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11530                           /*Volatile=*/false, Int64Ty);
11531   }
11532   llvm::Value *Args[] = {
11533       emitUpdateLocation(CGF, C->getBeginLoc()),
11534       getThreadID(CGF, C->getBeginLoc()),
11535       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11536   llvm::FunctionCallee RTLFn;
11537   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11538     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11539                                                   OMPRTL___kmpc_doacross_post);
11540   } else {
11541     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11542     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11543                                                   OMPRTL___kmpc_doacross_wait);
11544   }
11545   CGF.EmitRuntimeCall(RTLFn, Args);
11546 }
11547 
11548 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11549                                llvm::FunctionCallee Callee,
11550                                ArrayRef<llvm::Value *> Args) const {
11551   assert(Loc.isValid() && "Outlined function call location must be valid.");
11552   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11553 
11554   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11555     if (Fn->doesNotThrow()) {
11556       CGF.EmitNounwindRuntimeCall(Fn, Args);
11557       return;
11558     }
11559   }
11560   CGF.EmitRuntimeCall(Callee, Args);
11561 }
11562 
11563 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11564     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11565     ArrayRef<llvm::Value *> Args) const {
11566   emitCall(CGF, Loc, OutlinedFn, Args);
11567 }
11568 
11569 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11570   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11571     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11572       HasEmittedDeclareTargetRegion = true;
11573 }
11574 
11575 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11576                                              const VarDecl *NativeParam,
11577                                              const VarDecl *TargetParam) const {
11578   return CGF.GetAddrOfLocalVar(NativeParam);
11579 }
11580 
11581 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11582                                                    const VarDecl *VD) {
11583   if (!VD)
11584     return Address::invalid();
11585   Address UntiedAddr = Address::invalid();
11586   Address UntiedRealAddr = Address::invalid();
11587   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11588   if (It != FunctionToUntiedTaskStackMap.end()) {
11589     const UntiedLocalVarsAddressesMap &UntiedData =
11590         UntiedLocalVarsStack[It->second];
11591     auto I = UntiedData.find(VD);
11592     if (I != UntiedData.end()) {
11593       UntiedAddr = I->second.first;
11594       UntiedRealAddr = I->second.second;
11595     }
11596   }
11597   const VarDecl *CVD = VD->getCanonicalDecl();
11598   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11599     // Use the default allocation.
11600     if (!isAllocatableDecl(VD))
11601       return UntiedAddr;
11602     llvm::Value *Size;
11603     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11604     if (CVD->getType()->isVariablyModifiedType()) {
11605       Size = CGF.getTypeSize(CVD->getType());
11606       // Align the size: ((size + align - 1) / align) * align
11607       Size = CGF.Builder.CreateNUWAdd(
11608           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11609       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11610       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11611     } else {
11612       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11613       Size = CGM.getSize(Sz.alignTo(Align));
11614     }
11615     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11616     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11617     assert(AA->getAllocator() &&
11618            "Expected allocator expression for non-default allocator.");
11619     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11620     // According to the standard, the original allocator type is a enum
11621     // (integer). Convert to pointer type, if required.
11622     Allocator = CGF.EmitScalarConversion(
11623         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11624         AA->getAllocator()->getExprLoc());
11625     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11626 
11627     llvm::Value *Addr =
11628         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11629                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11630                             Args, getName({CVD->getName(), ".void.addr"}));
11631     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11632         CGM.getModule(), OMPRTL___kmpc_free);
11633     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11634     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11635         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11636     if (UntiedAddr.isValid())
11637       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11638 
11639     // Cleanup action for allocate support.
11640     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11641       llvm::FunctionCallee RTLFn;
11642       unsigned LocEncoding;
11643       Address Addr;
11644       const Expr *Allocator;
11645 
11646     public:
11647       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11648                            Address Addr, const Expr *Allocator)
11649           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11650             Allocator(Allocator) {}
11651       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11652         if (!CGF.HaveInsertPoint())
11653           return;
11654         llvm::Value *Args[3];
11655         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11656             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11657         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11658             Addr.getPointer(), CGF.VoidPtrTy);
11659         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11660         // According to the standard, the original allocator type is a enum
11661         // (integer). Convert to pointer type, if required.
11662         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11663                                             CGF.getContext().VoidPtrTy,
11664                                             Allocator->getExprLoc());
11665         Args[2] = AllocVal;
11666 
11667         CGF.EmitRuntimeCall(RTLFn, Args);
11668       }
11669     };
11670     Address VDAddr =
11671         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11672     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11673         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11674         VDAddr, AA->getAllocator());
11675     if (UntiedRealAddr.isValid())
11676       if (auto *Region =
11677               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11678         Region->emitUntiedSwitch(CGF);
11679     return VDAddr;
11680   }
11681   return UntiedAddr;
11682 }
11683 
11684 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11685                                              const VarDecl *VD) const {
11686   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11687   if (It == FunctionToUntiedTaskStackMap.end())
11688     return false;
11689   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11690 }
11691 
11692 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11693     CodeGenModule &CGM, const OMPLoopDirective &S)
11694     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11695   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11696   if (!NeedToPush)
11697     return;
11698   NontemporalDeclsSet &DS =
11699       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11700   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11701     for (const Stmt *Ref : C->private_refs()) {
11702       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11703       const ValueDecl *VD;
11704       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11705         VD = DRE->getDecl();
11706       } else {
11707         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11708         assert((ME->isImplicitCXXThis() ||
11709                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11710                "Expected member of current class.");
11711         VD = ME->getMemberDecl();
11712       }
11713       DS.insert(VD);
11714     }
11715   }
11716 }
11717 
11718 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11719   if (!NeedToPush)
11720     return;
11721   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11722 }
11723 
11724 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11725     CodeGenFunction &CGF,
11726     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11727                          std::pair<Address, Address>> &LocalVars)
11728     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11729   if (!NeedToPush)
11730     return;
11731   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11732       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11733   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11734 }
11735 
11736 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11737   if (!NeedToPush)
11738     return;
11739   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11740 }
11741 
11742 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11743   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11744 
11745   return llvm::any_of(
11746       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11747       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11748 }
11749 
11750 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11751     const OMPExecutableDirective &S,
11752     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11753     const {
11754   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11755   // Vars in target/task regions must be excluded completely.
11756   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11757       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11758     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11759     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11760     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11761     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11762       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11763         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11764     }
11765   }
11766   // Exclude vars in private clauses.
11767   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11768     for (const Expr *Ref : C->varlists()) {
11769       if (!Ref->getType()->isScalarType())
11770         continue;
11771       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11772       if (!DRE)
11773         continue;
11774       NeedToCheckForLPCs.insert(DRE->getDecl());
11775     }
11776   }
11777   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11778     for (const Expr *Ref : C->varlists()) {
11779       if (!Ref->getType()->isScalarType())
11780         continue;
11781       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11782       if (!DRE)
11783         continue;
11784       NeedToCheckForLPCs.insert(DRE->getDecl());
11785     }
11786   }
11787   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11788     for (const Expr *Ref : C->varlists()) {
11789       if (!Ref->getType()->isScalarType())
11790         continue;
11791       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11792       if (!DRE)
11793         continue;
11794       NeedToCheckForLPCs.insert(DRE->getDecl());
11795     }
11796   }
11797   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11798     for (const Expr *Ref : C->varlists()) {
11799       if (!Ref->getType()->isScalarType())
11800         continue;
11801       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11802       if (!DRE)
11803         continue;
11804       NeedToCheckForLPCs.insert(DRE->getDecl());
11805     }
11806   }
11807   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11808     for (const Expr *Ref : C->varlists()) {
11809       if (!Ref->getType()->isScalarType())
11810         continue;
11811       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11812       if (!DRE)
11813         continue;
11814       NeedToCheckForLPCs.insert(DRE->getDecl());
11815     }
11816   }
11817   for (const Decl *VD : NeedToCheckForLPCs) {
11818     for (const LastprivateConditionalData &Data :
11819          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11820       if (Data.DeclToUniqueName.count(VD) > 0) {
11821         if (!Data.Disabled)
11822           NeedToAddForLPCsAsDisabled.insert(VD);
11823         break;
11824       }
11825     }
11826   }
11827 }
11828 
11829 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11830     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11831     : CGM(CGF.CGM),
11832       Action((CGM.getLangOpts().OpenMP >= 50 &&
11833               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11834                            [](const OMPLastprivateClause *C) {
11835                              return C->getKind() ==
11836                                     OMPC_LASTPRIVATE_conditional;
11837                            }))
11838                  ? ActionToDo::PushAsLastprivateConditional
11839                  : ActionToDo::DoNotPush) {
11840   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11841   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11842     return;
11843   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11844          "Expected a push action.");
11845   LastprivateConditionalData &Data =
11846       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11847   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11848     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11849       continue;
11850 
11851     for (const Expr *Ref : C->varlists()) {
11852       Data.DeclToUniqueName.insert(std::make_pair(
11853           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11854           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11855     }
11856   }
11857   Data.IVLVal = IVLVal;
11858   Data.Fn = CGF.CurFn;
11859 }
11860 
11861 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11862     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11863     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11864   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11865   if (CGM.getLangOpts().OpenMP < 50)
11866     return;
11867   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11868   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11869   if (!NeedToAddForLPCsAsDisabled.empty()) {
11870     Action = ActionToDo::DisableLastprivateConditional;
11871     LastprivateConditionalData &Data =
11872         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11873     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11874       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11875     Data.Fn = CGF.CurFn;
11876     Data.Disabled = true;
11877   }
11878 }
11879 
11880 CGOpenMPRuntime::LastprivateConditionalRAII
11881 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11882     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11883   return LastprivateConditionalRAII(CGF, S);
11884 }
11885 
11886 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11887   if (CGM.getLangOpts().OpenMP < 50)
11888     return;
11889   if (Action == ActionToDo::DisableLastprivateConditional) {
11890     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11891            "Expected list of disabled private vars.");
11892     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11893   }
11894   if (Action == ActionToDo::PushAsLastprivateConditional) {
11895     assert(
11896         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11897         "Expected list of lastprivate conditional vars.");
11898     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11899   }
11900 }
11901 
11902 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11903                                                         const VarDecl *VD) {
11904   ASTContext &C = CGM.getContext();
11905   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11906   if (I == LastprivateConditionalToTypes.end())
11907     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11908   QualType NewType;
11909   const FieldDecl *VDField;
11910   const FieldDecl *FiredField;
11911   LValue BaseLVal;
11912   auto VI = I->getSecond().find(VD);
11913   if (VI == I->getSecond().end()) {
11914     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11915     RD->startDefinition();
11916     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11917     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11918     RD->completeDefinition();
11919     NewType = C.getRecordType(RD);
11920     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11921     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11922     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11923   } else {
11924     NewType = std::get<0>(VI->getSecond());
11925     VDField = std::get<1>(VI->getSecond());
11926     FiredField = std::get<2>(VI->getSecond());
11927     BaseLVal = std::get<3>(VI->getSecond());
11928   }
11929   LValue FiredLVal =
11930       CGF.EmitLValueForField(BaseLVal, FiredField);
11931   CGF.EmitStoreOfScalar(
11932       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11933       FiredLVal);
11934   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11935 }
11936 
11937 namespace {
11938 /// Checks if the lastprivate conditional variable is referenced in LHS.
11939 class LastprivateConditionalRefChecker final
11940     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11941   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11942   const Expr *FoundE = nullptr;
11943   const Decl *FoundD = nullptr;
11944   StringRef UniqueDeclName;
11945   LValue IVLVal;
11946   llvm::Function *FoundFn = nullptr;
11947   SourceLocation Loc;
11948 
11949 public:
11950   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11951     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11952          llvm::reverse(LPM)) {
11953       auto It = D.DeclToUniqueName.find(E->getDecl());
11954       if (It == D.DeclToUniqueName.end())
11955         continue;
11956       if (D.Disabled)
11957         return false;
11958       FoundE = E;
11959       FoundD = E->getDecl()->getCanonicalDecl();
11960       UniqueDeclName = It->second;
11961       IVLVal = D.IVLVal;
11962       FoundFn = D.Fn;
11963       break;
11964     }
11965     return FoundE == E;
11966   }
11967   bool VisitMemberExpr(const MemberExpr *E) {
11968     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11969       return false;
11970     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11971          llvm::reverse(LPM)) {
11972       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11973       if (It == D.DeclToUniqueName.end())
11974         continue;
11975       if (D.Disabled)
11976         return false;
11977       FoundE = E;
11978       FoundD = E->getMemberDecl()->getCanonicalDecl();
11979       UniqueDeclName = It->second;
11980       IVLVal = D.IVLVal;
11981       FoundFn = D.Fn;
11982       break;
11983     }
11984     return FoundE == E;
11985   }
11986   bool VisitStmt(const Stmt *S) {
11987     for (const Stmt *Child : S->children()) {
11988       if (!Child)
11989         continue;
11990       if (const auto *E = dyn_cast<Expr>(Child))
11991         if (!E->isGLValue())
11992           continue;
11993       if (Visit(Child))
11994         return true;
11995     }
11996     return false;
11997   }
11998   explicit LastprivateConditionalRefChecker(
11999       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12000       : LPM(LPM) {}
12001   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12002   getFoundData() const {
12003     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12004   }
12005 };
12006 } // namespace
12007 
12008 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12009                                                        LValue IVLVal,
12010                                                        StringRef UniqueDeclName,
12011                                                        LValue LVal,
12012                                                        SourceLocation Loc) {
12013   // Last updated loop counter for the lastprivate conditional var.
12014   // int<xx> last_iv = 0;
12015   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12016   llvm::Constant *LastIV =
12017       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12018   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12019       IVLVal.getAlignment().getAsAlign());
12020   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12021 
12022   // Last value of the lastprivate conditional.
12023   // decltype(priv_a) last_a;
12024   llvm::Constant *Last = getOrCreateInternalVariable(
12025       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12026   cast<llvm::GlobalVariable>(Last)->setAlignment(
12027       LVal.getAlignment().getAsAlign());
12028   LValue LastLVal =
12029       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12030 
12031   // Global loop counter. Required to handle inner parallel-for regions.
12032   // iv
12033   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12034 
12035   // #pragma omp critical(a)
12036   // if (last_iv <= iv) {
12037   //   last_iv = iv;
12038   //   last_a = priv_a;
12039   // }
12040   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12041                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12042     Action.Enter(CGF);
12043     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12044     // (last_iv <= iv) ? Check if the variable is updated and store new
12045     // value in global var.
12046     llvm::Value *CmpRes;
12047     if (IVLVal.getType()->isSignedIntegerType()) {
12048       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12049     } else {
12050       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12051              "Loop iteration variable must be integer.");
12052       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12053     }
12054     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12055     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12056     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12057     // {
12058     CGF.EmitBlock(ThenBB);
12059 
12060     //   last_iv = iv;
12061     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12062 
12063     //   last_a = priv_a;
12064     switch (CGF.getEvaluationKind(LVal.getType())) {
12065     case TEK_Scalar: {
12066       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12067       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12068       break;
12069     }
12070     case TEK_Complex: {
12071       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12072       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12073       break;
12074     }
12075     case TEK_Aggregate:
12076       llvm_unreachable(
12077           "Aggregates are not supported in lastprivate conditional.");
12078     }
12079     // }
12080     CGF.EmitBranch(ExitBB);
12081     // There is no need to emit line number for unconditional branch.
12082     (void)ApplyDebugLocation::CreateEmpty(CGF);
12083     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12084   };
12085 
12086   if (CGM.getLangOpts().OpenMPSimd) {
12087     // Do not emit as a critical region as no parallel region could be emitted.
12088     RegionCodeGenTy ThenRCG(CodeGen);
12089     ThenRCG(CGF);
12090   } else {
12091     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12092   }
12093 }
12094 
12095 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12096                                                          const Expr *LHS) {
12097   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12098     return;
12099   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12100   if (!Checker.Visit(LHS))
12101     return;
12102   const Expr *FoundE;
12103   const Decl *FoundD;
12104   StringRef UniqueDeclName;
12105   LValue IVLVal;
12106   llvm::Function *FoundFn;
12107   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12108       Checker.getFoundData();
12109   if (FoundFn != CGF.CurFn) {
12110     // Special codegen for inner parallel regions.
12111     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12112     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12113     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12114            "Lastprivate conditional is not found in outer region.");
12115     QualType StructTy = std::get<0>(It->getSecond());
12116     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12117     LValue PrivLVal = CGF.EmitLValue(FoundE);
12118     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12119         PrivLVal.getAddress(CGF),
12120         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12121     LValue BaseLVal =
12122         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12123     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12124     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12125                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12126                         FiredLVal, llvm::AtomicOrdering::Unordered,
12127                         /*IsVolatile=*/true, /*isInit=*/false);
12128     return;
12129   }
12130 
12131   // Private address of the lastprivate conditional in the current context.
12132   // priv_a
12133   LValue LVal = CGF.EmitLValue(FoundE);
12134   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12135                                    FoundE->getExprLoc());
12136 }
12137 
12138 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12139     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12140     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12141   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12142     return;
12143   auto Range = llvm::reverse(LastprivateConditionalStack);
12144   auto It = llvm::find_if(
12145       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12146   if (It == Range.end() || It->Fn != CGF.CurFn)
12147     return;
12148   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12149   assert(LPCI != LastprivateConditionalToTypes.end() &&
12150          "Lastprivates must be registered already.");
12151   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12152   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12153   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12154   for (const auto &Pair : It->DeclToUniqueName) {
12155     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12156     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12157       continue;
12158     auto I = LPCI->getSecond().find(Pair.first);
12159     assert(I != LPCI->getSecond().end() &&
12160            "Lastprivate must be rehistered already.");
12161     // bool Cmp = priv_a.Fired != 0;
12162     LValue BaseLVal = std::get<3>(I->getSecond());
12163     LValue FiredLVal =
12164         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12165     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12166     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12167     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12168     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12169     // if (Cmp) {
12170     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12171     CGF.EmitBlock(ThenBB);
12172     Address Addr = CGF.GetAddrOfLocalVar(VD);
12173     LValue LVal;
12174     if (VD->getType()->isReferenceType())
12175       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12176                                            AlignmentSource::Decl);
12177     else
12178       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12179                                 AlignmentSource::Decl);
12180     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12181                                      D.getBeginLoc());
12182     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12183     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12184     // }
12185   }
12186 }
12187 
12188 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12189     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12190     SourceLocation Loc) {
12191   if (CGF.getLangOpts().OpenMP < 50)
12192     return;
12193   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12194   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12195          "Unknown lastprivate conditional variable.");
12196   StringRef UniqueName = It->second;
12197   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12198   // The variable was not updated in the region - exit.
12199   if (!GV)
12200     return;
12201   LValue LPLVal = CGF.MakeAddrLValue(
12202       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12203   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12204   CGF.EmitStoreOfScalar(Res, PrivLVal);
12205 }
12206 
12207 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12208     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12209     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12210   llvm_unreachable("Not supported in SIMD-only mode");
12211 }
12212 
12213 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12214     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12215     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12216   llvm_unreachable("Not supported in SIMD-only mode");
12217 }
12218 
12219 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12220     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12221     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12222     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12223     bool Tied, unsigned &NumberOfParts) {
12224   llvm_unreachable("Not supported in SIMD-only mode");
12225 }
12226 
12227 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12228                                            SourceLocation Loc,
12229                                            llvm::Function *OutlinedFn,
12230                                            ArrayRef<llvm::Value *> CapturedVars,
12231                                            const Expr *IfCond) {
12232   llvm_unreachable("Not supported in SIMD-only mode");
12233 }
12234 
12235 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12236     CodeGenFunction &CGF, StringRef CriticalName,
12237     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12238     const Expr *Hint) {
12239   llvm_unreachable("Not supported in SIMD-only mode");
12240 }
12241 
12242 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12243                                            const RegionCodeGenTy &MasterOpGen,
12244                                            SourceLocation Loc) {
12245   llvm_unreachable("Not supported in SIMD-only mode");
12246 }
12247 
12248 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12249                                             SourceLocation Loc) {
12250   llvm_unreachable("Not supported in SIMD-only mode");
12251 }
12252 
12253 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12254     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12255     SourceLocation Loc) {
12256   llvm_unreachable("Not supported in SIMD-only mode");
12257 }
12258 
12259 void CGOpenMPSIMDRuntime::emitSingleRegion(
12260     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12261     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12262     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12263     ArrayRef<const Expr *> AssignmentOps) {
12264   llvm_unreachable("Not supported in SIMD-only mode");
12265 }
12266 
12267 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12268                                             const RegionCodeGenTy &OrderedOpGen,
12269                                             SourceLocation Loc,
12270                                             bool IsThreads) {
12271   llvm_unreachable("Not supported in SIMD-only mode");
12272 }
12273 
12274 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12275                                           SourceLocation Loc,
12276                                           OpenMPDirectiveKind Kind,
12277                                           bool EmitChecks,
12278                                           bool ForceSimpleCall) {
12279   llvm_unreachable("Not supported in SIMD-only mode");
12280 }
12281 
12282 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12283     CodeGenFunction &CGF, SourceLocation Loc,
12284     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12285     bool Ordered, const DispatchRTInput &DispatchValues) {
12286   llvm_unreachable("Not supported in SIMD-only mode");
12287 }
12288 
12289 void CGOpenMPSIMDRuntime::emitForStaticInit(
12290     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12291     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12292   llvm_unreachable("Not supported in SIMD-only mode");
12293 }
12294 
12295 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12296     CodeGenFunction &CGF, SourceLocation Loc,
12297     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12298   llvm_unreachable("Not supported in SIMD-only mode");
12299 }
12300 
12301 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12302                                                      SourceLocation Loc,
12303                                                      unsigned IVSize,
12304                                                      bool IVSigned) {
12305   llvm_unreachable("Not supported in SIMD-only mode");
12306 }
12307 
12308 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12309                                               SourceLocation Loc,
12310                                               OpenMPDirectiveKind DKind) {
12311   llvm_unreachable("Not supported in SIMD-only mode");
12312 }
12313 
12314 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12315                                               SourceLocation Loc,
12316                                               unsigned IVSize, bool IVSigned,
12317                                               Address IL, Address LB,
12318                                               Address UB, Address ST) {
12319   llvm_unreachable("Not supported in SIMD-only mode");
12320 }
12321 
12322 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12323                                                llvm::Value *NumThreads,
12324                                                SourceLocation Loc) {
12325   llvm_unreachable("Not supported in SIMD-only mode");
12326 }
12327 
12328 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12329                                              ProcBindKind ProcBind,
12330                                              SourceLocation Loc) {
12331   llvm_unreachable("Not supported in SIMD-only mode");
12332 }
12333 
12334 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12335                                                     const VarDecl *VD,
12336                                                     Address VDAddr,
12337                                                     SourceLocation Loc) {
12338   llvm_unreachable("Not supported in SIMD-only mode");
12339 }
12340 
12341 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12342     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12343     CodeGenFunction *CGF) {
12344   llvm_unreachable("Not supported in SIMD-only mode");
12345 }
12346 
12347 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12348     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12349   llvm_unreachable("Not supported in SIMD-only mode");
12350 }
12351 
12352 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12353                                     ArrayRef<const Expr *> Vars,
12354                                     SourceLocation Loc,
12355                                     llvm::AtomicOrdering AO) {
12356   llvm_unreachable("Not supported in SIMD-only mode");
12357 }
12358 
12359 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12360                                        const OMPExecutableDirective &D,
12361                                        llvm::Function *TaskFunction,
12362                                        QualType SharedsTy, Address Shareds,
12363                                        const Expr *IfCond,
12364                                        const OMPTaskDataTy &Data) {
12365   llvm_unreachable("Not supported in SIMD-only mode");
12366 }
12367 
12368 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12369     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12370     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12371     const Expr *IfCond, const OMPTaskDataTy &Data) {
12372   llvm_unreachable("Not supported in SIMD-only mode");
12373 }
12374 
12375 void CGOpenMPSIMDRuntime::emitReduction(
12376     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12377     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12378     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12379   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12380   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12381                                  ReductionOps, Options);
12382 }
12383 
12384 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12385     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12386     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12387   llvm_unreachable("Not supported in SIMD-only mode");
12388 }
12389 
12390 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12391                                                 SourceLocation Loc,
12392                                                 bool IsWorksharingReduction) {
12393   llvm_unreachable("Not supported in SIMD-only mode");
12394 }
12395 
12396 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12397                                                   SourceLocation Loc,
12398                                                   ReductionCodeGen &RCG,
12399                                                   unsigned N) {
12400   llvm_unreachable("Not supported in SIMD-only mode");
12401 }
12402 
12403 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12404                                                   SourceLocation Loc,
12405                                                   llvm::Value *ReductionsPtr,
12406                                                   LValue SharedLVal) {
12407   llvm_unreachable("Not supported in SIMD-only mode");
12408 }
12409 
12410 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12411                                            SourceLocation Loc) {
12412   llvm_unreachable("Not supported in SIMD-only mode");
12413 }
12414 
12415 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12416     CodeGenFunction &CGF, SourceLocation Loc,
12417     OpenMPDirectiveKind CancelRegion) {
12418   llvm_unreachable("Not supported in SIMD-only mode");
12419 }
12420 
12421 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12422                                          SourceLocation Loc, const Expr *IfCond,
12423                                          OpenMPDirectiveKind CancelRegion) {
12424   llvm_unreachable("Not supported in SIMD-only mode");
12425 }
12426 
12427 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12428     const OMPExecutableDirective &D, StringRef ParentName,
12429     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12430     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12431   llvm_unreachable("Not supported in SIMD-only mode");
12432 }
12433 
12434 void CGOpenMPSIMDRuntime::emitTargetCall(
12435     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12436     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12437     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12438     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12439                                      const OMPLoopDirective &D)>
12440         SizeEmitter) {
12441   llvm_unreachable("Not supported in SIMD-only mode");
12442 }
12443 
12444 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12445   llvm_unreachable("Not supported in SIMD-only mode");
12446 }
12447 
12448 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12449   llvm_unreachable("Not supported in SIMD-only mode");
12450 }
12451 
12452 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12453   return false;
12454 }
12455 
12456 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12457                                         const OMPExecutableDirective &D,
12458                                         SourceLocation Loc,
12459                                         llvm::Function *OutlinedFn,
12460                                         ArrayRef<llvm::Value *> CapturedVars) {
12461   llvm_unreachable("Not supported in SIMD-only mode");
12462 }
12463 
12464 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12465                                              const Expr *NumTeams,
12466                                              const Expr *ThreadLimit,
12467                                              SourceLocation Loc) {
12468   llvm_unreachable("Not supported in SIMD-only mode");
12469 }
12470 
12471 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12472     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12473     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12474   llvm_unreachable("Not supported in SIMD-only mode");
12475 }
12476 
12477 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12478     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12479     const Expr *Device) {
12480   llvm_unreachable("Not supported in SIMD-only mode");
12481 }
12482 
12483 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12484                                            const OMPLoopDirective &D,
12485                                            ArrayRef<Expr *> NumIterations) {
12486   llvm_unreachable("Not supported in SIMD-only mode");
12487 }
12488 
12489 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12490                                               const OMPDependClause *C) {
12491   llvm_unreachable("Not supported in SIMD-only mode");
12492 }
12493 
12494 const VarDecl *
12495 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12496                                         const VarDecl *NativeParam) const {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 Address
12501 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12502                                          const VarDecl *NativeParam,
12503                                          const VarDecl *TargetParam) const {
12504   llvm_unreachable("Not supported in SIMD-only mode");
12505 }
12506